// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 4x16 DS
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_4x16_DS] -in
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2, r3;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0); r3 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(1.788e-02, 9.228e-02, 1.276e-02, -1.347e-02) * s0_0_0;
	r1 += V4(1.482e-02, -2.908e-02, 7.169e-02, -4.113e-02) * s0_0_0;
	r2 += V4(1.425e-02, 9.581e-02, 7.619e-02, -3.136e-02) * s0_0_0;
	r3 += V4(-6.137e-01, -2.612e-02, -5.735e-02, -2.123e-02) * s0_0_0;
	r0 += V4(2.841e-01, 6.152e-01, 1.061e-01, 4.163e-02) * s0_0_1;
	r1 += V4(-2.933e-02, -7.730e-03, -8.994e-02, -1.130e-01) * s0_0_1;
	r2 += V4(6.252e-01, 2.783e-01, 1.036e-01, -7.267e-02) * s0_0_1;
	r3 += V4(4.932e-02, -1.178e-01, -7.147e-03, -6.465e-01) * s0_0_1;
	r0 += V4(6.771e-01, 4.364e-02, 1.024e-01, -2.629e-02) * s0_0_2;
	r1 += V4(6.176e-03, -2.737e-03, -3.966e-04, 1.380e-01) * s0_0_2;
	r2 += V4(-2.149e-02, 1.072e-01, 3.302e-03, 1.145e-01) * s0_0_2;
	r3 += V4(2.251e-02, 5.900e-03, -8.374e-02, 5.348e-03) * s0_0_2;
	r0 += V4(-2.682e-02, -4.554e-02, -5.527e-02, -1.278e-02) * s0_1_0;
	r1 += V4(1.062e+00, 5.409e-02, 3.330e-01, -3.386e-01) * s0_1_0;
	r2 += V4(-6.352e-01, -2.680e-02, -2.086e+00, -3.117e-01) * s0_1_0;
	r3 += V4(4.234e-02, -2.371e-01, 9.448e-02, 5.827e-02) * s0_1_0;
	r0 += V4(-2.582e-01, -5.810e-01, -9.931e-02, 6.215e-01) * s0_1_1;
	r1 += V4(1.461e-02, -1.327e-01, -4.055e-01, -4.580e-01) * s0_1_1;
	r2 += V4(8.506e-03, -7.773e-01, 8.033e-02, -4.596e-01) * s0_1_1;
	r3 += V4(5.438e-01, -2.400e-01, 6.334e-01, 6.297e-01) * s0_1_1;
	r0 += V4(-5.957e-01, -2.832e-02, -2.785e-01, -5.958e-01) * s0_1_2;
	r1 += V4(-1.624e-02, 3.387e-01, -6.068e-03, 8.204e-01) * s0_1_2;
	r2 += V4(9.166e-03, 1.604e-01, -2.713e-02, 7.559e-01) * s0_1_2;
	r3 += V4(-4.210e-02, -1.626e-01, -3.933e-01, -3.854e-02) * s0_1_2;
	r0 += V4(-1.214e-03, -3.737e-02, -3.454e-02, 2.390e-02) * s0_2_0;
	r1 += V4(-3.435e-03, 1.213e-02, 1.004e-01, 2.782e-03) * s0_2_0;
	r2 += V4(1.367e-02, -2.685e-02, 5.416e-02, -1.064e-03) * s0_2_0;
	r3 += V4(-1.073e-04, 6.851e-02, -3.380e-02, -4.146e-02) * s0_2_0;
	r0 += V4(2.554e-02, -3.787e-02, 1.072e-01, -3.989e-02) * s0_2_1;
	r1 += V4(-2.676e-02, -1.329e-02, 3.959e-03, -1.080e-01) * s0_2_1;
	r2 += V4(-1.568e-02, 7.774e-02, -4.529e-02, -8.814e-02) * s0_2_1;
	r3 += V4(-1.695e-02, 6.310e-01, -1.190e-02, 4.941e-02) * s0_2_1;
	r0 += V4(-1.136e-01, -2.241e-02, 1.603e-01, 5.764e-03) * s0_2_2;
	r1 += V4(6.791e-03, -7.158e-02, -1.655e-02, 9.945e-02) * s0_2_2;
	r2 += V4(2.137e-03, 3.491e-03, 8.381e-02, 9.264e-02) * s0_2_2;
	r3 += V4(1.743e-02, 9.058e-02, -1.358e-01, 9.263e-03) * s0_2_2;
	r0 += V4(1.632e-02, -1.462e-02, 4.336e-03, 1.858e-03);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.004e+00, -1.225e-03, 2.959e-02, 1.617e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(5.447e-06, 2.543e-03, 4.396e-02, -1.157e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r2));
	r3 += V4(-1.267e-02, 1.800e-02, 2.404e-02, 5.023e-03);
	r3 = clamp(r3, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r3));
}

//!DESC [CuNNy_4x16_DS] -conv1
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
shared V4 G[4][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			vec2 p;
			p = vec2(clamp(pos + ivec2(x - 1, y - 1), ivec2(0), sz) * ivec2(2, 2) + ivec2(1, 1)) * in_pt;
			V4 sr0 = V4(in_gather(p, 0));
			V4 sg0 = V4(in_gather(p, 1));
			V4 sb0 = V4(in_gather(p, 2));
			V4 sa0 = V4(in_gather(p, 3));
			G[0][ay][ax] = V4(sr0.w, sg0.w, sb0.w, sa0.w);
			G[1][ay][ax] = V4(sr0.z, sg0.z, sb0.z, sa0.z);
			G[2][ay][ax] = V4(sr0.x, sg0.x, sb0.x, sa0.x);
			G[3][ay][ax] = V4(sr0.y, sg0.y, sb0.y, sa0.y);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2, r3;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0); r3 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(4.955e-01, 1.540e-01, -3.637e-01, -1.822e-01, -6.262e-01, -7.369e-03, 8.398e-02, 2.164e-01, 1.051e-01, -1.191e-01, -5.353e-01, -4.964e-01, -5.378e-01, -4.252e-02, -3.228e-01, 3.043e-01) * s0_0_0;
	r1 += M4(-1.747e-01, -2.927e-01, 1.955e-02, -2.476e-01, 2.489e-02, -2.445e-02, 8.711e-02, 2.222e-01, -1.233e-01, -2.607e-01, 1.659e-01, 3.049e-01, 1.858e-01, -3.242e-01, 5.038e-02, 7.401e-02) * s0_0_0;
	r2 += M4(1.985e-02, -3.809e-01, -4.810e-02, 4.887e-02, 1.453e-01, 2.701e-01, 3.331e-01, -4.874e-03, -3.367e-01, -3.995e-01, -4.612e-01, -3.802e-02, -9.937e-02, -5.475e-01, 2.627e-01, -1.220e-01) * s0_0_0;
	r3 += M4(2.055e-01, -7.652e-02, -1.320e-01, 5.309e-01, 2.878e-01, 1.909e-01, 4.626e-02, -1.198e-01, -3.079e-01, -1.366e-01, -1.779e-02, -3.945e-01, -6.402e-02, 2.203e-01, 4.775e-01, -6.195e-03) * s0_0_0;
	r0 += M4(-2.511e-01, 6.156e-02, -2.128e-01, 3.300e-01, -5.792e-02, -2.075e-01, 8.000e-01, 4.108e-01, -1.182e-01, 8.709e-03, -1.666e-01, -1.177e-01, 6.497e-01, -4.121e-02, -2.255e-02, -4.467e-01) * s0_0_1;
	r1 += M4(1.761e-01, -4.600e-01, 1.187e-02, 2.009e-01, 2.316e-01, 8.417e-01, 4.568e-02, 4.944e-02, -6.885e-02, -3.893e-01, -1.335e-01, -2.662e-01, -2.575e-01, 1.442e-01, 9.203e-02, 3.761e-01) * s0_0_1;
	r2 += M4(7.737e-02, 1.774e-03, -1.709e-01, -8.222e-02, -8.268e-02, 1.672e-01, 9.246e-02, 1.582e-01, -1.760e-02, 9.166e-04, -6.570e-02, -8.368e-02, -1.108e-01, -7.178e-03, 9.520e-03, 6.109e-02) * s0_0_1;
	r3 += M4(1.180e-01, 2.897e-02, -6.173e-02, -2.293e-02, -1.407e-01, 1.195e-03, 2.359e-01, -8.080e-01, -3.935e-01, -7.753e-02, 8.094e-02, -5.396e-03, -2.851e-01, 2.232e-01, 6.165e-02, 1.785e-01) * s0_0_1;
	r0 += M4(5.750e-02, 6.127e-03, -5.834e-02, 7.021e-02, 4.210e-01, -1.542e-02, -2.123e-01, -4.563e-01, -1.305e-01, -4.231e-02, 4.759e-01, -3.694e-01, 1.468e-01, -9.858e-03, 3.684e-01, 3.640e-01) * s0_0_2;
	r1 += M4(-1.126e-02, 1.065e-01, 4.806e-02, 3.456e-02, -2.037e-01, -1.207e-01, 2.004e-02, -2.940e-01, -1.496e-01, 3.691e-01, 1.973e-02, 1.809e-02, -7.046e-02, 6.653e-03, 2.310e-02, -8.483e-02) * s0_0_2;
	r2 += M4(3.945e-02, -1.077e-02, -7.334e-03, 2.361e-02, -1.919e-01, -8.771e-02, 4.478e-01, 4.506e-02, -4.748e-03, -8.899e-02, 1.997e-03, -8.150e-02, 7.129e-02, 2.824e-01, 2.881e-01, 2.277e-02) * s0_0_2;
	r3 += M4(-2.710e-03, -7.099e-03, -3.972e-04, -5.689e-02, 1.877e-01, -1.511e-02, 6.165e-02, 2.956e-01, 7.541e-03, -1.025e-01, -4.064e-02, 3.136e-02, 1.978e-01, -6.793e-03, 1.382e-01, -4.867e-02) * s0_0_2;
	r0 += M4(1.558e-01, -2.239e-01, 1.077e-01, -7.836e-01, 1.324e-01, 1.326e-01, -1.272e-01, -2.091e-02, 3.197e-01, 2.141e-01, 2.623e-01, 4.785e-01, 4.951e-01, -2.939e-01, 1.712e-02, -1.165e-01) * s0_1_0;
	r1 += M4(-3.784e-01, -2.566e-01, -2.216e-01, -4.562e-01, -5.767e-02, 3.039e-02, -9.004e-02, -4.145e-02, 2.843e-01, 3.971e-01, -9.044e-02, 1.749e-01, -1.694e-01, -5.349e-02, 7.758e-01, 2.388e-01) * s0_1_0;
	r2 += M4(-2.516e-01, -1.000e+00, 2.448e-01, 1.040e-01, 2.628e-01, 2.045e-01, -9.874e-02, -2.596e-02, 8.026e-02, 1.707e-01, 1.606e-01, 6.534e-02, -1.000e+00, -1.981e-01, -1.000e+00, -1.648e-01) * s0_1_0;
	r3 += M4(4.471e-03, 1.336e-03, 4.465e-01, 2.719e-01, 2.999e-01, 5.145e-02, -1.743e-01, -8.032e-02, 3.458e-01, 5.699e-01, -2.430e-01, 1.132e-04, -1.000e+00, 2.913e-01, 1.647e-02, 9.454e-02) * s0_1_0;
	r0 += M4(-2.252e-01, -4.022e-02, 1.806e-01, -3.932e-01, 1.862e-02, 4.455e-01, 3.342e-02, -5.457e-02, -2.507e-01, -1.015e-01, -3.573e-01, 3.433e-01, 4.542e-01, -7.696e-02, -3.594e-01, 1.465e-01) * s0_1_1;
	r1 += M4(-1.228e-01, 2.092e-01, 1.250e-01, 7.802e-02, 1.667e-01, -1.431e-01, 1.167e-01, 6.426e-01, -7.120e-02, 3.420e-01, 4.828e-02, -2.385e-01, -8.744e-01, 3.179e-01, -1.489e-01, 3.590e-01) * s0_1_1;
	r2 += M4(-1.041e-01, -2.627e-01, 2.336e-01, -3.861e-01, -3.618e-01, -4.741e-01, -3.998e-02, 2.587e-01, -4.049e-01, 1.137e-01, -4.502e-02, 1.497e-01, -5.628e-03, 5.838e-02, 1.169e-01, -9.863e-02) * s0_1_1;
	r3 += M4(-4.944e-02, 1.128e-01, -1.951e-02, 2.214e-01, 2.998e-02, 4.972e-01, -5.538e-01, 3.526e-01, -1.658e-01, 3.677e-02, -1.748e-02, -8.976e-02, 1.072e-01, 5.833e-03, 2.852e-01, -1.891e-01) * s0_1_1;
	r0 += M4(-4.642e-02, 2.767e-02, 3.772e-01, 8.617e-02, 4.014e-01, -3.230e-02, 4.923e-02, 1.112e-01, -9.523e-02, -1.290e-01, -1.454e-01, 3.412e-01, -1.465e-01, 1.301e-02, 7.835e-01, 3.096e-01) * s0_1_2;
	r1 += M4(-6.737e-03, 3.726e-02, -7.632e-03, 6.277e-03, 2.937e-01, 3.077e-01, -3.243e-02, -8.493e-02, -9.799e-02, -3.089e-01, 1.462e-01, 6.687e-02, 1.938e-01, -4.890e-02, 1.439e-01, 1.932e-01) * s0_1_2;
	r2 += M4(5.387e-02, 9.575e-03, -1.978e-02, 6.788e-02, 1.285e-01, 2.364e-01, -4.027e-02, -5.386e-02, -2.519e-02, 6.188e-03, 2.786e-01, 1.349e-01, 2.922e-02, 1.365e-01, 7.405e-02, -1.857e-01) * s0_1_2;
	r3 += M4(-3.895e-02, 4.996e-03, -2.277e-02, 5.234e-02, 5.322e-02, -9.001e-02, 1.105e-01, -1.871e-01, 2.613e-02, 1.162e-02, 1.103e-02, 6.554e-02, 1.201e-01, 8.321e-02, 1.060e-01, 5.812e-02) * s0_1_2;
	r0 += M4(-2.031e-01, -4.322e-01, 2.312e-01, 2.487e-01, 2.203e-01, 2.548e-01, -5.301e-02, 3.055e-03, 1.549e-01, 5.444e-02, 1.341e-01, -2.627e-01, 4.838e-01, 1.259e-01, -3.021e-01, -2.351e-01) * s0_2_0;
	r1 += M4(-1.800e-01, -1.000e+00, 3.277e-02, -3.319e-01, 1.874e-01, 8.418e-01, -4.627e-02, -7.933e-02, 2.662e-01, -2.888e-01, -5.538e-02, -6.225e-03, 2.564e-01, 1.145e-01, -1.379e-02, -4.149e-01) * s0_2_0;
	r2 += M4(1.231e-01, -5.590e-02, 2.842e-01, 2.188e-02, -3.698e-02, 9.288e-02, -1.231e-02, -4.393e-03, 8.352e-02, -1.789e-02, -2.140e-01, -1.334e-01, 2.000e-01, 2.624e-01, -2.762e-01, -1.978e-01) * s0_2_0;
	r3 += M4(-2.922e-01, 1.653e-01, -2.280e-02, 5.138e-01, 2.736e-01, 9.328e-02, -3.136e-03, -5.849e-02, 1.765e-01, -2.786e-01, 2.075e-01, 1.544e-01, -1.312e-01, -9.376e-02, 2.865e-01, 1.214e-01) * s0_2_0;
	r0 += M4(-5.483e-01, -1.384e-01, -4.879e-01, -3.983e-01, 5.085e-02, -3.941e-01, 4.571e-01, 1.771e-01, 4.248e-02, -5.615e-02, 4.713e-01, 6.294e-02, 1.065e-02, 1.313e-02, -1.528e-01, 5.019e-01) * s0_2_1;
	r1 += M4(-1.834e-01, 1.000e+00, 2.832e-01, -1.146e-02, -1.000e+00, 1.379e-01, -2.804e-02, 1.708e-01, -7.342e-02, 1.548e-01, -2.716e-02, -1.307e-02, -3.355e-03, 8.579e-02, -1.127e-02, 1.054e-02) * s0_2_1;
	r2 += M4(-3.364e-02, -1.197e-02, 6.260e-01, 9.815e-02, 2.993e-02, 1.563e-01, 1.247e-01, -8.971e-02, 1.130e-01, 1.355e-01, 7.221e-02, 9.001e-02, -4.703e-02, -7.056e-02, -6.430e-02, -9.142e-02) * s0_2_1;
	r3 += M4(1.253e-01, -1.050e-01, -3.753e-01, 1.217e-02, 2.028e-01, -2.525e-01, -2.104e-01, -2.784e-01, 7.126e-02, -5.491e-02, -3.661e-02, 2.407e-02, 1.637e-02, 7.079e-02, 4.868e-02, 3.242e-02) * s0_2_1;
	r0 += M4(-4.012e-02, -3.383e-02, 1.237e-01, 3.764e-02, 7.285e-01, 3.447e-01, -2.031e-01, 4.405e-01, -1.273e-01, -5.260e-02, -1.903e-01, -1.042e-01, -3.037e-01, 3.297e-02, 1.280e-01, -6.807e-03) * s0_2_2;
	r1 += M4(-6.005e-02, -8.560e-02, 4.607e-02, 8.663e-03, 3.442e-01, -1.000e+00, -5.544e-02, 8.985e-02, 1.646e-02, -5.283e-02, -8.416e-03, -1.332e-01, -4.051e-01, 6.048e-01, -7.615e-03, -4.128e-02) * s0_2_2;
	r2 += M4(4.435e-04, 2.037e-02, 2.207e-02, -2.727e-02, -1.266e-01, -1.058e-01, -4.737e-02, 2.301e-02, 2.583e-02, -1.624e-02, -2.252e-02, -5.011e-02, 4.314e-02, 6.372e-02, 3.632e-02, -1.273e-01) * s0_2_2;
	r3 += M4(6.610e-03, -2.237e-02, -5.361e-02, -3.571e-02, 9.108e-02, 1.808e-01, -4.817e-02, 4.280e-02, -4.142e-02, -5.548e-03, -4.962e-03, -7.805e-02, 2.205e-01, 8.538e-02, -3.954e-03, 4.076e-02) * s0_2_2;
	r0 += M4(5.573e-03, -6.796e-02, 1.487e-01, -2.941e-01, -3.036e-01, -6.266e-02, -1.847e-01, -6.251e-01, -3.286e-01, 1.085e-02, 2.602e-01, -1.456e-01, -1.000e+00, -5.199e-02, 4.911e-01, -3.864e-01) * s1_0_0;
	r1 += M4(-4.260e-01, -1.525e-01, -2.189e-02, -2.106e-01, -3.178e-02, -1.538e-01, -6.358e-03, 1.174e-01, 2.666e-02, 2.862e-01, 9.594e-02, -4.991e-02, -2.468e-02, -1.000e+00, 1.328e-01, -1.573e-01) * s1_0_0;
	r2 += M4(-6.864e-02, -1.503e-01, -3.503e-03, -8.423e-02, -1.291e-01, -6.025e-01, -4.321e-01, -1.947e-01, -8.774e-02, 1.084e-01, 6.264e-02, 4.357e-03, -1.145e-01, 5.394e-01, -1.349e-01, -5.561e-02) * s1_0_0;
	r3 += M4(-3.909e-02, -2.450e-02, -3.379e-02, 4.218e-02, 1.830e-01, -2.545e-01, -2.545e-03, 2.266e-01, 5.632e-02, -7.040e-02, 8.580e-02, 1.890e-02, 2.590e-02, -9.118e-02, 1.713e-03, -4.280e-01) * s1_0_0;
	r0 += M4(1.062e-01, -3.040e-02, -2.771e-01, 2.390e-02, 3.528e-02, -9.128e-02, -1.006e-01, 1.153e-01, 3.418e-01, 2.843e-02, 2.008e-01, -1.359e-01, -7.811e-01, 1.892e-02, -4.817e-02, -4.144e-01) * s1_0_1;
	r1 += M4(1.000e+00, 7.672e-04, 6.015e-02, 4.428e-02, -2.992e-01, 9.203e-02, 1.383e-01, -4.865e-03, 1.406e-01, 3.259e-01, -3.524e-02, -3.909e-01, -7.597e-02, 2.607e-01, 1.126e-01, 1.184e-01) * s1_0_1;
	r2 += M4(-2.685e-01, -4.986e-01, 1.154e-01, -9.870e-02, 3.650e-03, 2.348e-01, 7.504e-02, -1.582e-01, -6.933e-01, -3.953e-01, -5.029e-01, 1.295e-01, 1.797e-01, 6.427e-03, 1.857e-01, 1.829e-01) * s1_0_1;
	r3 += M4(2.030e-01, 1.604e-01, 2.861e-01, 4.106e-01, 2.863e-02, 2.054e-01, 1.080e-01, -1.785e-01, -4.194e-02, -3.109e-01, -1.395e-01, -1.967e-01, 1.261e-01, -9.497e-02, -2.329e-01, 3.445e-02) * s1_0_1;
	r0 += M4(3.111e-01, -1.666e-02, 3.835e-01, -1.000e+00, 2.475e-01, 2.605e-02, -4.411e-03, -2.248e-01, -3.058e-01, -9.799e-02, -7.026e-01, 4.919e-01, -4.212e-01, 8.434e-02, 3.014e-01, 1.264e-01) * s1_0_2;
	r1 += M4(-1.000e+00, -1.292e-01, 1.532e-03, -6.841e-01, -2.823e-01, -1.408e-02, -1.270e-01, -4.012e-02, -3.967e-01, 1.414e-01, 2.361e-02, -8.837e-02, 1.289e-01, 7.209e-03, 1.454e-01, 5.410e-01) * s1_0_2;
	r2 += M4(4.796e-01, -1.113e-01, 3.841e-01, 1.867e-01, -1.571e-01, -1.973e-01, -1.977e-01, -1.615e-01, 2.822e-01, 2.132e-01, 2.349e-01, 4.324e-02, -5.929e-02, -5.586e-04, -2.492e-01, -1.765e-01) * s1_0_2;
	r3 += M4(6.057e-02, 2.407e-01, 2.088e-01, 5.848e-01, -1.153e-01, -8.765e-02, 8.575e-02, -2.475e-02, 1.586e-01, 4.909e-02, 2.231e-01, -6.667e-02, -1.173e-02, 1.479e-01, -1.508e-01, -7.359e-02) * s1_0_2;
	r0 += M4(5.496e-01, -1.078e-02, 7.133e-02, 6.855e-02, 6.200e-02, -3.047e-01, 1.239e-01, 9.424e-02, 8.507e-02, 6.862e-02, 3.101e-02, -8.228e-02, -3.676e-01, 4.615e-01, 3.154e-01, 6.995e-01) * s1_1_0;
	r1 += M4(7.740e-01, 1.762e-01, 1.390e-01, -1.001e-01, 8.321e-01, 1.815e-01, -1.077e-01, -2.207e-01, 2.297e-01, -5.027e-01, -9.764e-02, 1.705e-01, -4.897e-02, 7.559e-01, -9.980e-01, -1.000e+00) * s1_1_0;
	r2 += M4(-5.088e-01, 3.349e-02, 7.645e-02, 6.474e-02, 5.404e-01, 7.468e-02, 2.902e-01, -4.406e-02, 1.384e-01, 3.583e-02, 1.243e-02, -2.037e-02, -5.494e-01, 4.380e-01, 1.567e-01, 1.356e-02) * s1_1_0;
	r3 += M4(1.691e-01, -9.047e-02, -3.360e-02, -3.942e-01, -2.522e-01, -7.975e-01, 1.966e-01, -2.507e-01, 2.283e-01, -2.790e-02, -1.280e-02, 2.917e-02, -8.158e-01, 2.393e-01, 6.091e-02, 4.829e-01) * s1_1_0;
	r0 += M4(3.804e-01, -5.426e-02, -7.598e-01, 4.543e-01, 3.442e-01, 1.050e-01, 1.143e-01, 6.161e-01, -1.534e-01, -9.033e-02, -3.916e-01, -5.802e-01, -3.236e-01, -3.297e-01, -1.000e+00, -1.000e+00) * s1_1_1;
	r1 += M4(8.070e-02, -4.037e-01, -7.499e-02, 6.905e-01, -1.869e-01, -2.743e-01, -1.018e-01, 1.867e-02, 7.886e-01, -1.501e-01, 3.637e-01, 3.393e-01, -4.272e-01, 9.351e-02, -4.166e-01, -1.000e+00) * s1_1_1;
	r2 += M4(1.700e-01, 4.375e-02, -8.502e-01, -5.264e-03, -2.636e-01, 5.457e-01, 2.025e-01, 5.158e-01, 8.848e-01, -1.969e-01, 4.302e-01, 2.857e-01, 2.947e-01, 5.095e-01, 5.740e-01, 5.553e-01) * s1_1_1;
	r3 += M4(1.000e+00, 9.881e-03, 9.505e-01, 4.242e-01, 1.732e-01, 6.202e-01, 1.166e-01, 2.775e-01, 1.013e-01, 1.079e-01, -3.566e-01, -1.191e-01, -3.576e-02, 1.852e-01, 1.888e-01, -1.824e-01) * s1_1_1;
	r0 += M4(-6.568e-01, -4.685e-01, 1.000e+00, -2.554e-01, -2.145e-01, -9.195e-02, 6.386e-02, 1.409e-01, -2.976e-01, 3.287e-01, 1.141e-01, -2.460e-01, 3.276e-01, -2.374e-01, -8.692e-01, 8.718e-02) * s1_1_2;
	r1 += M4(-1.000e+00, 1.000e+00, -3.493e-01, 9.018e-02, 2.025e-01, 3.637e-01, 3.697e-02, 1.734e-02, 2.503e-02, -9.792e-02, 3.874e-01, 3.669e-01, -2.690e-01, -1.218e-01, -1.927e-01, -3.484e-01) * s1_1_2;
	r2 += M4(1.000e+00, 7.111e-01, 5.208e-01, -1.345e-01, 5.723e-02, 2.516e-01, 2.207e-01, 1.718e-01, -5.744e-01, -1.732e-02, -3.751e-02, -4.776e-01, 2.003e-01, 6.436e-02, 1.094e-01, 4.736e-01) * s1_1_2;
	r3 += M4(1.000e+00, -7.493e-01, 6.782e-01, -9.094e-02, -2.374e-01, -6.079e-02, 7.481e-02, 7.455e-02, 5.177e-01, 6.842e-02, -1.161e-01, 1.892e-01, -2.574e-02, -9.005e-02, 3.396e-01, -1.378e-01) * s1_1_2;
	r0 += M4(-2.557e-01, 3.164e-02, 2.679e-01, 1.510e-01, 1.656e-01, 4.036e-01, -4.631e-01, -3.667e-01, 1.295e-01, -3.781e-02, -1.073e-01, 3.171e-01, 3.987e-01, -4.574e-01, 2.848e-01, 1.360e-01) * s1_2_0;
	r1 += M4(-4.225e-01, -2.935e-01, 7.407e-02, -1.577e-01, 8.047e-02, -1.349e-01, 2.578e-01, 3.499e-01, -9.221e-02, 1.889e-01, -8.409e-02, -1.834e-01, -1.231e-01, -2.656e-01, -2.075e-01, -3.260e-01) * s1_2_0;
	r2 += M4(4.509e-02, -1.039e-02, -2.553e-01, 4.692e-02, 1.113e-01, -3.814e-01, 2.306e-02, -7.759e-03, 1.250e-01, 9.443e-02, 6.118e-02, 4.893e-02, -2.743e-02, 2.217e-01, 1.416e-02, -2.113e-01) * s1_2_0;
	r3 += M4(-3.250e-01, -2.821e-01, 2.295e-02, -2.893e-01, 2.534e-01, -1.127e-01, -6.785e-02, 7.658e-02, 1.644e-04, -1.964e-02, 7.112e-02, -4.343e-02, -1.516e-01, 1.406e-02, 2.094e-01, 6.469e-02) * s1_2_0;
	r0 += M4(-5.336e-01, 9.804e-02, -5.423e-01, 1.115e-01, -2.424e-01, 7.556e-02, -2.781e-02, 2.025e-01, -4.930e-01, 3.449e-01, 1.792e-01, -4.909e-02, 3.154e-01, 3.647e-01, 2.987e-01, 2.996e-01) * s1_2_1;
	r1 += M4(4.770e-01, -1.577e-01, -4.512e-02, 2.263e-01, -5.164e-02, 1.309e-01, 1.435e-02, -7.216e-02, -1.094e-01, -2.306e-01, 8.838e-02, 2.520e-01, -1.547e-01, -3.045e-01, -1.846e-02, -1.712e-01) * s1_2_1;
	r2 += M4(-2.374e-01, 3.254e-01, -1.492e-01, 1.948e-02, 4.339e-01, 1.820e-01, 8.589e-02, -1.176e-01, -8.095e-02, -1.487e-01, 5.920e-02, 2.077e-02, -2.405e-01, -1.262e-01, 3.006e-01, -1.289e-01) * s1_2_1;
	r3 += M4(2.867e-01, -2.648e-01, 3.255e-01, 3.557e-01, 1.196e-01, 2.221e-01, -5.960e-02, -1.698e-02, 5.552e-02, 3.255e-02, -2.727e-01, -9.418e-02, 1.871e-01, 1.690e-01, -4.098e-02, 2.930e-02) * s1_2_1;
	r0 += M4(1.423e-01, 4.329e-01, 1.967e-01, -5.943e-04, 9.194e-03, -6.400e-02, 4.495e-01, 1.294e-01, -1.224e-01, -3.693e-01, -3.036e-01, -8.125e-02, 5.186e-01, 2.220e-01, 5.050e-01, -1.467e-01) * s1_2_2;
	r1 += M4(-8.930e-01, -3.987e-01, -8.134e-02, -5.183e-02, -1.830e-01, 7.196e-02, -5.511e-02, -1.261e-01, -2.783e-01, 1.979e-01, -2.568e-01, -1.044e-01, 5.985e-02, -4.238e-02, 1.213e-01, 2.389e-01) * s1_2_2;
	r2 += M4(1.233e-01, 2.255e-01, -4.837e-01, 2.433e-03, -4.961e-02, 5.376e-03, -9.788e-02, -1.342e-01, 3.014e-01, 1.112e-01, -2.675e-01, -4.932e-03, -2.687e-02, -1.770e-01, -6.369e-02, -6.848e-03) * s1_2_2;
	r3 += M4(4.229e-01, -2.798e-01, 8.815e-01, -1.000e+00, -3.396e-02, 2.486e-02, -1.274e-01, -1.261e-01, 4.108e-02, 4.653e-02, 1.315e-01, -1.185e-01, 6.024e-02, 1.138e-01, -2.458e-01, 2.330e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2]; s1_0_0 = G[3][xy.y+0][xy.x+0];
	s1_0_1 = G[3][xy.y+0][xy.x+1]; s1_0_2 = G[3][xy.y+0][xy.x+2];
	s1_1_0 = G[3][xy.y+1][xy.x+0]; s1_1_1 = G[3][xy.y+1][xy.x+1];
	s1_1_2 = G[3][xy.y+1][xy.x+2]; s1_2_0 = G[3][xy.y+2][xy.x+0];
	s1_2_1 = G[3][xy.y+2][xy.x+1]; s1_2_2 = G[3][xy.y+2][xy.x+2];
	r0 += M4(-8.757e-02, -1.707e-02, -1.127e-03, 3.610e-02, 7.040e-01, -1.096e-02, 6.966e-03, 1.286e-02, 5.498e-02, 3.180e-02, 7.244e-02, -1.583e-02, 7.285e-01, 4.310e-02, -5.750e-01, 2.410e-02) * s0_0_0;
	r1 += M4(3.941e-02, -2.055e-01, -5.073e-02, 4.083e-02, 5.304e-02, 3.293e-01, -2.440e-02, -2.571e-01, 5.312e-02, 3.123e-01, -6.616e-02, 1.072e-01, 2.902e-02, 8.589e-01, -1.558e-01, 1.104e-01) * s0_0_0;
	r2 += M4(-4.395e-03, -2.632e-02, -8.306e-02, -4.996e-02, 6.406e-02, -1.359e-01, -8.807e-02, 1.957e-02, -1.234e-01, -2.208e-02, 5.370e-03, -6.730e-03, 1.584e-01, -3.898e-01, 1.417e-01, 7.546e-02) * s0_0_0;
	r3 += M4(-1.422e-01, -6.739e-02, 2.451e-02, 3.704e-02, -7.268e-02, -2.414e-02, 1.527e-01, -5.473e-02, 1.139e-02, -6.302e-02, 3.832e-02, -8.230e-02, -1.226e-02, 7.820e-02, 4.841e-02, 2.157e-01) * s0_0_0;
	r0 += M4(-3.448e-01, -3.342e-02, -2.118e-01, 1.169e-01, 2.922e-02, 1.347e-01, 1.680e-01, -5.564e-01, -3.284e-01, -1.086e-01, -1.518e-01, -1.240e-01, 8.330e-01, 6.867e-02, -3.864e-01, 4.195e-01) * s0_0_1;
	r1 += M4(-2.501e-02, -1.317e-01, -1.635e-01, 1.661e-01, 2.529e-01, -5.470e-01, 4.919e-03, 4.294e-02, -3.569e-01, -1.000e+00, 4.716e-02, -1.419e-01, 2.698e-01, -1.932e-01, -1.330e-01, -2.675e-02) * s0_0_1;
	r2 += M4(-1.089e-02, 1.563e-02, -7.992e-02, -6.635e-02, 9.725e-02, 1.887e-01, 1.522e-02, 1.253e-02, -5.861e-02, 5.130e-02, 1.304e-01, -1.162e-01, -6.803e-02, 1.830e-01, -1.153e-03, -1.217e-01) * s0_0_1;
	r3 += M4(-5.432e-02, -9.789e-02, 1.591e-01, -1.242e-01, -1.527e-01, 5.864e-02, -2.157e-01, 7.239e-01, 6.109e-02, -1.491e-01, 2.015e-01, 4.337e-02, 1.156e-01, -1.326e-02, 6.881e-02, -2.903e-01) * s0_0_1;
	r0 += M4(-3.853e-01, -3.987e-02, 3.900e-01, 3.366e-01, -2.534e-01, -3.949e-02, -2.599e-02, -8.448e-02, 3.343e-01, -4.959e-03, 5.300e-02, 3.094e-02, 4.326e-01, -8.252e-02, -4.678e-01, -1.840e-01) * s0_0_2;
	r1 += M4(-1.329e-01, 3.650e-01, 6.128e-04, -5.123e-02, 1.026e-01, 2.859e-01, -3.346e-02, 8.971e-02, 2.058e-01, 3.027e-01, -1.044e-01, 8.955e-02, -1.129e-01, -1.840e-01, -1.252e-01, -6.463e-01) * s0_0_2;
	r2 += M4(3.202e-02, -1.916e-02, -1.928e-01, 1.096e-02, 1.087e-01, 8.077e-02, -7.165e-02, 1.316e-02, -5.888e-02, 2.172e-01, -2.633e-01, -3.368e-02, 2.506e-02, 5.620e-02, 4.153e-01, 2.030e-01) * s0_0_2;
	r3 += M4(-1.278e-01, 2.370e-02, 9.145e-02, -2.183e-01, -4.067e-02, 1.072e-01, 1.822e-01, -4.899e-02, 7.522e-03, -1.978e-01, 8.270e-02, -1.894e-02, 6.143e-02, -1.468e-01, 8.451e-02, 1.093e-01) * s0_0_2;
	r0 += M4(-5.023e-01, -9.320e-02, -1.509e-01, 2.646e-01, 9.826e-02, -5.933e-02, -9.601e-02, 4.215e-01, 2.025e-01, 5.375e-03, -1.300e-01, 5.854e-02, 3.468e-01, -4.070e-01, -5.584e-01, -6.435e-01) * s0_1_0;
	r1 += M4(9.867e-02, 3.153e-01, 5.387e-02, 1.150e-02, -2.638e-01, -1.450e-01, -2.091e-01, -1.334e-01, -2.052e-01, -2.036e-01, 6.828e-02, -2.407e-02, -2.937e-01, -4.662e-01, 9.192e-01, 1.000e+00) * s0_1_0;
	r2 += M4(1.924e-01, -4.856e-02, -1.003e-01, 4.793e-03, -1.482e-01, -2.203e-01, -3.150e-01, -1.002e-01, -2.871e-02, 4.239e-03, -1.771e-01, -3.449e-02, 5.879e-01, -2.241e-01, -3.368e-02, -7.748e-02) * s0_1_0;
	r3 += M4(-3.230e-02, 1.832e-02, 9.377e-02, -3.333e-02, -2.600e-01, -3.332e-01, 2.740e-01, -1.028e-01, -1.043e-01, -1.262e-01, -1.248e-01, 5.956e-03, 8.112e-01, -2.328e-01, -1.209e-01, -4.794e-01) * s0_1_0;
	r0 += M4(1.883e-01, 1.382e-03, -1.701e-01, -5.298e-01, -5.069e-02, -1.119e-01, -1.127e-01, 4.751e-01, 1.543e-01, 1.040e-01, -1.266e-01, 6.815e-01, 4.345e-01, 1.187e-01, 6.213e-01, 4.364e-01) * s0_1_1;
	r1 += M4(1.805e-01, 5.267e-01, 3.569e-01, 4.157e-02, 1.000e+00, -2.253e-02, 1.809e-01, -2.799e-02, 7.710e-02, -2.416e-01, -6.343e-02, -1.000e+00, 3.525e-01, -4.107e-02, 4.365e-01, 1.000e+00) * s0_1_1;
	r2 += M4(3.633e-01, -1.057e-01, -2.325e-02, -6.599e-02, -7.854e-02, 4.998e-02, 4.988e-01, 1.007e-02, 2.504e-02, 3.801e-02, 2.859e-02, -1.144e-01, -3.037e-01, -7.136e-01, -6.504e-01, 1.489e-01) * s0_1_1;
	r3 += M4(-6.112e-01, 1.568e-01, -2.821e-01, 3.591e-01, 8.198e-02, 1.777e-01, 4.388e-01, -2.797e-01, 2.492e-01, -3.691e-01, 2.937e-01, 1.612e-02, -5.374e-01, -1.607e-01, 1.826e-01, 1.764e-01) * s0_1_1;
	r0 += M4(-2.271e-01, -3.234e-02, -3.311e-01, 1.308e-01, 2.112e-01, 5.644e-02, 5.599e-02, -2.772e-02, 7.440e-02, 2.861e-01, 1.246e-01, 3.814e-01, -4.004e-01, 2.764e-01, 7.396e-01, 1.027e-01) * s0_1_2;
	r1 += M4(-4.395e-01, -1.000e+00, -1.818e-01, -1.413e-01, 2.646e-01, 4.204e-01, 1.331e-01, 1.331e-01, 1.000e+00, -1.000e+00, -8.869e-02, 1.040e-01, 2.959e-01, -6.108e-02, 1.754e-01, 3.430e-01) * s0_1_2;
	r2 += M4(-9.434e-01, -2.454e-01, -5.140e-01, 4.316e-02, 1.646e-01, 1.139e-01, 1.323e-01, 2.797e-01, -1.000e+00, -5.235e-02, -2.204e-01, 1.206e-01, -2.407e-01, -1.748e-01, -1.985e-01, -5.258e-01) * s0_1_2;
	r3 += M4(-3.541e-01, 2.212e-01, 3.643e-01, 3.147e-01, 6.931e-02, 7.134e-02, -2.067e-02, -4.779e-02, -3.701e-01, -5.368e-02, 1.514e-01, 5.055e-03, 1.417e-01, 1.146e-01, -3.186e-01, 1.548e-01) * s0_1_2;
	r0 += M4(-2.692e-01, 4.942e-02, -5.303e-02, 9.643e-02, 1.106e-01, 1.056e-01, -4.119e-02, -2.289e-02, 1.386e-01, -1.057e-01, -4.513e-03, -2.512e-02, -1.762e-01, 3.857e-01, -2.810e-01, -4.058e-02) * s0_2_0;
	r1 += M4(1.047e-02, -1.617e-01, 1.910e-02, -9.737e-03, -2.618e-01, 1.310e-01, 3.343e-02, -2.249e-01, -1.312e-01, -1.504e-01, 3.108e-03, 3.339e-02, 2.239e-01, 1.322e-02, 1.753e-01, 2.940e-01) * s0_2_0;
	r2 += M4(7.650e-02, -4.797e-02, -1.144e-02, 8.444e-03, 1.368e-01, -8.455e-02, -1.639e-02, -3.551e-02, -1.023e-01, -6.326e-03, 7.231e-02, -5.479e-02, 1.377e-02, -2.569e-01, 1.528e-01, 2.393e-01) * s0_2_0;
	r3 += M4(-1.920e-02, -4.086e-02, 6.021e-02, -6.175e-02, -2.848e-01, -5.001e-03, 1.229e-02, 1.130e-01, -8.253e-02, 1.667e-02, -1.392e-01, 1.265e-01, 2.871e-01, -2.662e-02, -1.476e-01, -2.746e-02) * s0_2_0;
	r0 += M4(-1.643e-01, -1.013e-01, 5.050e-03, -1.924e-01, 7.327e-02, -4.569e-02, -1.328e-01, -1.461e-01, 1.606e-02, 4.793e-02, 5.514e-02, -1.135e-01, -3.705e-01, -3.322e-01, -6.662e-01, -3.158e-01) * s0_2_1;
	r1 += M4(-1.000e+00, -6.447e-02, -2.320e-02, 3.267e-01, -2.833e-01, 3.576e-02, -2.093e-02, 9.449e-02, -3.095e-02, 3.512e-01, -4.549e-02, 2.217e-02, 4.467e-01, 2.910e-01, 1.349e-01, 1.373e-01) * s0_2_1;
	r2 += M4(-1.430e-01, 8.506e-02, -2.871e-01, 3.535e-02, 2.785e-02, -1.110e-01, -6.678e-02, -3.146e-02, 2.654e-01, -7.405e-02, 6.316e-04, -4.476e-02, 2.332e-01, 1.624e-01, -1.260e-01, 2.399e-01) * s0_2_1;
	r3 += M4(-5.385e-02, -3.297e-02, 9.295e-02, -2.138e-01, 1.389e-01, -1.130e-01, 1.148e-01, -1.866e-01, 1.100e-01, -5.245e-02, -7.713e-02, 3.597e-03, 3.782e-02, -1.475e-01, -1.234e-01, -3.567e-02) * s0_2_1;
	r0 += M4(-9.597e-02, 5.823e-02, -1.250e-01, 5.953e-01, -6.142e-02, 1.874e-02, 5.500e-02, -2.106e-03, -1.977e-01, -2.077e-01, 9.602e-03, 1.738e-01, -6.015e-01, -2.466e-01, -8.577e-01, 1.359e-01) * s0_2_2;
	r1 += M4(1.948e-01, 6.668e-02, -3.107e-01, -2.423e-02, -4.043e-02, -1.821e-01, -3.600e-02, 7.069e-02, 7.719e-02, 2.585e-01, 1.467e-01, 1.441e-01, -3.871e-02, 1.597e-01, -9.869e-02, -2.173e-01) * s0_2_2;
	r2 += M4(-1.510e-02, 9.796e-02, -6.873e-01, -7.372e-02, 3.729e-02, -3.835e-02, -1.455e-01, -4.822e-02, 2.911e-02, -5.249e-03, 2.549e-01, -8.153e-03, 8.067e-02, 2.933e-01, 1.581e-01, -9.208e-03) * s0_2_2;
	r3 += M4(-1.086e-01, 4.805e-02, 3.686e-01, 4.144e-02, -2.181e-02, -6.884e-02, 2.511e-01, 5.430e-02, 4.733e-02, -2.063e-01, 1.444e-02, 1.131e-01, -6.401e-02, -8.366e-02, 2.235e-01, -2.667e-01) * s0_2_2;
	r0 += M4(5.148e-02, 1.702e-02, 8.913e-02, 1.672e-01, -5.790e-01, -4.135e-04, 1.340e-01, -2.339e-02, 5.526e-01, 8.114e-02, -1.326e-01, -2.195e-01, -5.946e-02, -4.055e-02, -3.659e-01, 3.091e-01) * s1_0_0;
	r1 += M4(4.300e-02, 1.358e-01, -7.210e-03, -6.509e-02, -5.229e-02, -4.957e-01, -2.034e-01, 9.946e-03, 1.966e-01, 3.943e-01, 2.151e-01, 4.142e-01, -1.696e-02, -3.464e-01, -5.222e-02, -4.562e-03) * s1_0_0;
	r2 += M4(2.405e-02, 1.499e-02, 2.619e-02, 4.899e-02, 2.146e-01, 8.421e-02, -3.582e-01, -1.624e-02, 1.388e-01, -2.317e-01, 2.117e-01, 8.762e-02, -1.978e-01, -6.175e-02, -8.844e-02, 2.975e-02) * s1_0_0;
	r3 += M4(-1.401e-02, 7.832e-03, -4.241e-02, -3.347e-02, 3.700e-01, -1.313e-02, 2.116e-01, -2.770e-02, 2.638e-01, 2.372e-01, -1.805e-01, 2.013e-01, -2.017e-01, -1.175e-01, -1.573e-02, -7.632e-02) * s1_0_0;
	r0 += M4(3.736e-01, 3.338e-02, 1.150e-01, -1.935e-01, 4.230e-02, 1.971e-02, -1.383e-01, 5.971e-01, -2.075e-01, -2.893e-02, 9.644e-04, -1.914e-01, -3.507e-01, -3.645e-02, 4.273e-02, -5.778e-02) * s1_0_1;
	r1 += M4(4.058e-02, 3.668e-01, 7.153e-02, -5.892e-02, 4.002e-02, -1.289e-01, 6.772e-02, -2.383e-02, 2.481e-01, -6.738e-01, 4.948e-02, -4.079e-01, -1.989e-01, 1.554e-01, 4.842e-03, 9.854e-02) * s1_0_1;
	r2 += M4(9.723e-02, -1.010e-02, 1.614e-01, 1.005e-03, -1.079e-01, 5.926e-01, -1.847e-01, 1.665e-01, -3.427e-01, -3.921e-01, 2.022e-01, -1.797e-01, 1.625e-01, 1.162e-01, 4.342e-02, -6.776e-02) * s1_0_1;
	r3 += M4(2.300e-01, 8.606e-02, 5.323e-02, 1.942e-01, 1.721e-01, -1.647e-01, -4.087e-01, -1.000e+00, -8.833e-02, -3.724e-03, 1.604e-02, 2.881e-01, -7.018e-02, 1.000e-01, 1.129e-02, 9.875e-02) * s1_0_1;
	r0 += M4(4.157e-01, 3.778e-02, -3.791e-02, -2.417e-01, -2.745e-01, 3.640e-02, -5.644e-01, -1.628e-01, -1.374e-01, 4.106e-02, -1.507e-01, -3.088e-01, -9.151e-02, -5.834e-03, 2.579e-01, 7.319e-02) * s1_0_2;
	r1 += M4(6.296e-02, 2.386e-01, -7.730e-02, -9.545e-02, -9.404e-02, -2.275e-02, -1.449e-02, 8.530e-02, 4.725e-02, 1.018e-01, -3.274e-02, 1.522e-01, 4.182e-02, -2.928e-01, 3.992e-02, 2.601e-02) * s1_0_2;
	r2 += M4(-6.923e-02, -1.498e-01, 3.236e-02, 3.171e-02, -2.393e-02, -2.146e-01, -1.207e-01, 6.297e-02, -1.647e-01, -3.096e-01, -2.682e-01, -2.977e-02, 8.602e-02, 1.636e-01, 2.578e-02, -7.527e-03) * s1_0_2;
	r3 += M4(-8.422e-02, -4.047e-02, 1.021e-01, 5.113e-02, 1.066e-01, 1.935e-02, 1.200e-02, -1.327e-01, -2.491e-01, 2.555e-02, -1.616e-01, 4.014e-02, 2.258e-02, -2.789e-02, 5.481e-02, -5.283e-02) * s1_0_2;
	r0 += M4(5.928e-01, 5.140e-02, -3.788e-02, -4.334e-02, -5.446e-01, 1.834e-01, 3.273e-02, -5.521e-01, -2.497e-01, -1.164e-01, 2.751e-01, -1.533e-02, -3.711e-01, 5.112e-02, -4.015e-01, -6.131e-01) * s1_1_0;
	r1 += M4(-3.389e-02, 1.687e-01, 1.802e-01, -4.038e-02, 1.259e-01, -2.327e-02, 1.366e-01, 2.977e-01, 3.785e-02, -1.206e-01, 5.558e-01, 8.483e-01, 4.104e-01, -3.984e-01, 2.756e-01, -1.812e-01) * s1_1_0;
	r2 += M4(-1.167e-01, -1.637e-01, 2.824e-01, 2.266e-02, -2.476e-01, 9.776e-02, 2.512e-01, 2.217e-02, -6.919e-02, -1.344e-01, -1.961e-01, -7.514e-02, -4.680e-01, -1.859e-01, 9.357e-02, 2.287e-01) * s1_1_0;
	r3 += M4(6.029e-02, 1.082e-01, -1.001e-01, 1.519e-01, 3.882e-01, 6.361e-02, -1.041e-01, 1.405e-02, 5.332e-01, 2.040e-01, 2.312e-02, -1.017e-01, -2.465e-01, 8.600e-02, 6.305e-02, -2.206e-02) * s1_1_0;
	r0 += M4(-2.978e-01, -1.460e-01, 1.882e-01, 5.172e-01, 2.993e-02, 5.152e-01, -3.878e-01, -4.898e-01, -7.440e-01, 1.369e-02, 4.178e-01, 4.532e-01, 2.090e-01, -8.102e-02, -4.971e-01, -1.004e-01) * s1_1_1;
	r1 += M4(-1.000e+00, -3.135e-01, -1.815e-01, 1.882e-01, -9.128e-03, 2.982e-01, -1.763e-01, 3.181e-02, -6.279e-03, -1.487e-01, 4.914e-02, -3.342e-01, -1.000e+00, 6.713e-01, -1.137e-01, -9.876e-02) * s1_1_1;
	r2 += M4(2.438e-01, -3.904e-01, 9.402e-02, -2.563e-01, 1.020e-01, 2.948e-02, -1.905e-01, 1.894e-01, 4.520e-01, 3.760e-01, 1.135e-01, -4.149e-01, 1.900e-01, 2.389e-01, -6.217e-01, -9.017e-02) * s1_1_1;
	r3 += M4(2.040e-01, -2.922e-02, 1.886e-01, 5.070e-01, 1.969e-01, 8.594e-02, -5.142e-01, -2.667e-01, 2.652e-01, -7.656e-02, -4.826e-01, -1.450e-01, -4.685e-01, -1.646e-01, -1.231e-01, -9.348e-02) * s1_1_1;
	r0 += M4(-5.424e-02, -4.271e-02, -1.823e-01, 8.489e-03, 8.469e-02, -6.043e-03, 4.870e-01, -2.792e-01, 2.213e-01, -8.692e-02, -9.043e-01, -6.166e-01, -7.085e-02, 1.934e-02, 7.025e-01, 4.421e-01) * s1_1_2;
	r1 += M4(-5.809e-02, 1.000e+00, -2.833e-02, 1.541e-01, -1.824e-01, 3.115e-02, -3.305e-02, -1.543e-01, -2.083e-01, 1.725e-02, -1.775e-01, -2.806e-01, -1.093e-01, -6.640e-01, -3.206e-02, 2.416e-02) * s1_1_2;
	r2 += M4(2.250e-01, -1.704e-01, 9.242e-02, -5.629e-02, -1.006e-02, -3.068e-02, -3.380e-02, 2.765e-01, 3.830e-02, -4.342e-03, -1.003e-01, 1.597e-01, 1.204e-01, 1.978e-01, -5.342e-02, -2.212e-02) * s1_1_2;
	r3 += M4(2.290e-01, 3.641e-02, 4.172e-02, -2.725e-01, -7.737e-02, -9.031e-02, 3.461e-02, -1.261e-01, -2.362e-01, -1.582e-01, -1.148e-01, -1.139e-01, 2.113e-01, 2.105e-02, 1.596e-01, -5.593e-02) * s1_1_2;
	r0 += M4(5.922e-01, -3.756e-02, 1.364e-01, -2.687e-01, -1.944e-01, 2.748e-02, 1.623e-01, 1.587e-02, -3.007e-01, -1.285e-02, -3.185e-02, -1.549e-01, -9.008e-02, -3.239e-01, -2.483e-01, -1.897e-01) * s1_2_0;
	r1 += M4(5.162e-02, 1.341e-01, 3.649e-02, 3.387e-02, -1.739e-03, -4.711e-02, 3.700e-03, 5.356e-02, -1.256e-01, 3.126e-01, -1.967e-01, 1.394e-02, -2.500e-02, -1.637e-01, 8.629e-02, -3.205e-01) * s1_2_0;
	r2 += M4(1.445e-02, -6.305e-02, -9.545e-02, 4.414e-02, 3.651e-02, 3.236e-02, 2.090e-02, 2.654e-02, -5.558e-02, 1.794e-03, -1.480e-01, 9.065e-02, 1.002e-01, -1.736e-01, -3.580e-01, -2.004e-02) * s1_2_0;
	r3 += M4(3.245e-02, -2.892e-02, 1.242e-02, -1.919e-02, 1.204e-02, 3.025e-02, -1.250e-03, 8.896e-02, 4.894e-03, -3.561e-02, 1.873e-01, -5.677e-02, -2.315e-01, -2.109e-02, 2.322e-01, 2.621e-02) * s1_2_0;
	r0 += M4(2.860e-01, 1.214e-01, -2.403e-01, 1.776e-01, 7.349e-02, 5.832e-02, -8.575e-02, 2.396e-01, -1.292e-01, 1.476e-02, 2.792e-01, -2.782e-01, -6.745e-02, 9.626e-01, 4.476e-02, 6.400e-01) * s1_2_1;
	r1 += M4(-2.014e-02, -3.407e-01, -4.667e-02, -5.064e-01, 1.207e-01, 1.928e-01, 5.646e-02, -7.194e-02, -4.316e-02, 2.350e-01, -5.836e-02, -9.266e-02, 5.364e-02, 1.587e-01, -2.686e-01, 1.588e-01) * s1_2_1;
	r2 += M4(-2.367e-01, -1.786e-02, 2.786e-02, -1.480e-01, 7.058e-03, 5.462e-02, 1.948e-01, -3.478e-02, 7.032e-02, 2.295e-01, -2.538e-01, -9.418e-02, -3.589e-01, 3.305e-02, -6.012e-01, 1.616e-01) * s1_2_1;
	r3 += M4(-3.913e-01, -8.969e-03, 1.265e-01, 1.728e-02, -6.356e-02, 4.006e-02, -1.233e-01, -5.702e-02, -1.504e-02, -9.157e-03, 2.588e-01, -4.238e-02, 1.752e-01, -2.313e-02, 3.761e-01, 2.485e-01) * s1_2_1;
	r0 += M4(5.684e-01, 2.174e-02, -1.117e-01, -2.358e-01, 2.463e-02, -2.332e-02, -9.245e-02, -3.418e-02, 3.120e-01, -5.608e-03, -1.188e-01, 2.994e-01, -4.381e-01, -2.358e-01, 7.414e-01, 2.418e-01) * s1_2_2;
	r1 += M4(-1.153e-02, -2.446e-01, 1.294e-01, 2.454e-01, 2.570e-02, -5.144e-03, 8.531e-02, -5.032e-03, 4.432e-01, -5.516e-01, 2.628e-02, 2.725e-02, -1.008e-01, -6.999e-03, -1.695e-01, 9.292e-02) * s1_2_2;
	r2 += M4(-9.184e-01, -7.978e-02, 4.347e-01, 1.842e-01, -4.881e-02, -5.257e-02, 1.786e-02, 1.828e-02, -6.646e-02, -1.386e-01, 2.877e-03, 1.116e-01, 4.710e-01, 1.248e-01, -2.331e-01, -1.008e-01) * s1_2_2;
	r3 += M4(-4.652e-02, -7.791e-03, -2.957e-01, 3.785e-01, 4.967e-02, 2.023e-02, -6.874e-02, 5.005e-02, -1.567e-01, -5.259e-02, 1.556e-02, 2.684e-02, -3.361e-02, -1.249e-02, 4.660e-01, -9.838e-02) * s1_2_2;
	r0 += V4(7.821e-02, -1.181e-02, 2.486e-02, 5.901e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(1.066e-02, 1.157e-02, 1.048e-02, 1.067e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-6.791e-02, 2.061e-02, 2.751e-02, 2.634e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r2));
	r3 += V4(-1.610e-02, 7.276e-02, 3.187e-02, 5.688e-03);
	r3 = clamp(r3, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r3));
}

//!DESC [CuNNy_4x16_DS] -conv2
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
shared V4 G[4][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			vec2 p;
			p = vec2(clamp(pos + ivec2(x - 1, y - 1), ivec2(0), sz) * ivec2(2, 2) + ivec2(1, 1)) * conv1_pt;
			V4 sr0 = V4(conv1_gather(p, 0));
			V4 sg0 = V4(conv1_gather(p, 1));
			V4 sb0 = V4(conv1_gather(p, 2));
			V4 sa0 = V4(conv1_gather(p, 3));
			G[0][ay][ax] = V4(sr0.w, sg0.w, sb0.w, sa0.w);
			G[1][ay][ax] = V4(sr0.z, sg0.z, sb0.z, sa0.z);
			G[2][ay][ax] = V4(sr0.x, sg0.x, sb0.x, sa0.x);
			G[3][ay][ax] = V4(sr0.y, sg0.y, sb0.y, sa0.y);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2, r3;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0); r3 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-5.941e-02, 4.692e-02, 1.249e-01, -3.524e-01, 1.796e-02, -4.237e-02, -9.219e-02, -4.346e-02, 7.742e-02, -9.570e-02, 1.570e-02, -1.079e-01, 3.345e-02, -1.838e-04, -2.264e-02, 1.497e-01) * s0_0_0;
	r1 += M4(-2.476e-01, 5.274e-02, -3.138e-02, -4.732e-02, 1.073e-01, -2.363e-01, 7.653e-02, -1.597e-01, 1.163e-01, -5.856e-02, 1.228e-01, 1.254e-01, -1.427e-02, -5.880e-03, 3.946e-02, -2.040e-02) * s0_0_0;
	r2 += M4(8.218e-02, 1.219e-01, -4.861e-02, 1.382e-01, -1.818e-01, -5.226e-01, -2.530e-01, -6.699e-01, 3.720e-01, 2.430e-01, 1.075e-01, -5.883e-02, 1.266e-01, 1.244e-01, 7.348e-02, -5.728e-02) * s0_0_0;
	r3 += M4(2.654e-02, 1.627e-01, -1.212e-02, -2.578e-01, 6.235e-02, -1.113e-01, 6.319e-02, 5.737e-02, -7.017e-02, -1.841e-02, 8.168e-02, 2.583e-02, -2.753e-02, 7.054e-02, -2.163e-03, 2.324e-02) * s0_0_0;
	r0 += M4(-5.948e-01, -6.245e-02, 1.142e-02, -2.958e-01, 4.474e-01, 1.558e-01, -8.642e-01, -1.246e-01, 5.612e-01, 7.124e-02, 6.306e-02, -8.794e-02, -8.651e-02, 5.692e-02, 5.751e-02, 3.227e-02) * s0_0_1;
	r1 += M4(6.545e-02, -1.167e-01, 9.501e-02, 2.094e-02, 8.847e-01, 3.705e-01, 1.082e-01, 3.462e-01, 2.761e-02, -1.745e-02, 8.701e-02, 4.357e-02, 1.141e-01, 6.172e-02, 2.455e-03, -7.074e-02) * s0_0_1;
	r2 += M4(-3.004e-01, -2.869e-01, 2.233e-02, 4.110e-02, -1.000e+00, -1.000e+00, -1.000e+00, 1.695e-01, -2.565e-02, -3.259e-01, -1.094e-02, -8.868e-02, 1.425e-01, -2.854e-02, 1.135e-01, 1.904e-02) * s0_0_1;
	r3 += M4(7.076e-02, -1.485e-01, -6.960e-02, -1.099e-02, -1.004e-01, 1.683e-01, -8.651e-03, 1.782e-01, 7.451e-02, 6.931e-02, 7.965e-02, -8.742e-02, 3.847e-03, 5.536e-02, -7.029e-02, 6.046e-02) * s0_0_1;
	r0 += M4(1.552e-01, 1.301e-01, -6.135e-02, -4.103e-01, 1.087e-01, 1.055e-01, -2.435e-02, 7.550e-02, 6.779e-01, 4.454e-02, -1.121e-01, -1.082e-01, 1.410e-01, 2.301e-02, 1.070e-01, -4.370e-02) * s0_0_2;
	r1 += M4(-3.011e-01, 3.228e-02, -9.115e-05, 6.417e-02, -6.280e-01, 1.024e-01, 3.359e-03, 1.624e-02, -2.759e-01, -1.250e-02, 3.778e-02, 6.132e-02, -2.054e-01, 3.196e-03, -1.420e-03, -2.522e-02) * s0_0_2;
	r2 += M4(-1.140e-01, 4.896e-02, -1.882e-01, -6.086e-02, 1.496e-01, -2.772e-01, 2.910e-02, -1.700e-01, 2.194e-01, -1.268e-01, -6.155e-02, -9.466e-02, 1.308e-01, 1.005e-01, 7.450e-02, 5.007e-03) * s0_0_2;
	r3 += M4(-1.207e-02, 6.051e-02, 5.590e-02, -2.270e-01, -2.446e-01, -1.412e-01, 5.241e-01, 4.698e-02, 4.438e-02, 1.818e-01, 8.212e-02, -1.892e-01, 7.329e-03, 1.311e-02, 5.271e-02, 4.241e-02) * s0_0_2;
	r0 += M4(3.794e-01, 2.132e-02, 5.625e-04, -4.732e-01, 1.990e-01, 8.288e-02, -6.613e-02, -9.510e-02, -6.578e-01, -7.683e-02, -1.126e-01, -2.574e-02, -2.197e-01, 7.931e-02, 5.172e-02, 2.973e-01) * s0_1_0;
	r1 += M4(-3.403e-02, 1.648e-02, -2.301e-02, 9.025e-02, 2.822e-01, -1.850e-02, 4.000e-02, -1.260e-01, -3.259e-02, 1.886e-02, 3.095e-02, -1.266e-01, -3.624e-02, -3.102e-02, 2.934e-02, -4.564e-02) * s0_1_0;
	r2 += M4(4.271e-02, -3.340e-01, -1.399e-01, -2.173e-01, -4.319e-03, 4.951e-03, -1.474e-01, -2.495e-01, 1.809e-01, 4.884e-01, 3.058e-01, -5.285e-03, 9.566e-02, 2.069e-01, 1.686e-01, -2.697e-02) * s0_1_0;
	r3 += M4(-3.383e-02, 8.207e-02, -2.801e-02, 5.726e-02, 2.060e-02, 7.793e-02, 4.706e-02, -2.876e-01, -3.655e-02, 1.253e-01, -2.413e-02, -4.867e-01, -8.852e-03, -5.381e-03, -6.500e-02, 1.792e-01) * s0_1_0;
	r0 += M4(-1.196e-02, -7.879e-03, 6.924e-02, -7.522e-01, -4.705e-01, 4.071e-01, 3.759e-01, 2.653e-01, 3.007e-01, -6.394e-03, 8.524e-02, -3.415e-01, -1.406e-01, 1.110e-01, 2.339e-01, 2.747e-01) * s0_1_1;
	r1 += M4(-2.101e-01, -1.313e-01, 3.289e-01, -5.980e-02, 6.069e-01, -1.353e-01, 2.706e-01, 3.877e-01, -2.706e-01, 1.473e-01, 1.133e-01, 3.382e-02, 3.205e-01, 1.139e-01, 9.322e-02, -1.628e-01) * s0_1_1;
	r2 += M4(3.624e-01, -3.676e-01, 2.397e-01, 2.339e-02, -6.948e-01, 2.008e-01, 2.609e-01, 5.914e-03, -2.630e-01, 2.982e-01, 4.103e-02, -6.339e-02, 1.804e-01, 3.442e-02, 9.827e-02, -1.169e-01) * s0_1_1;
	r3 += M4(1.615e-01, -2.568e-02, 1.977e-01, 6.829e-02, 1.251e-01, 2.612e-01, -1.432e-01, 1.538e-01, 2.087e-02, 9.468e-04, -1.853e-01, 8.126e-02, 3.284e-02, 2.123e-01, 1.283e-02, 9.151e-02) * s0_1_1;
	r0 += M4(-2.474e-01, -6.218e-02, -4.449e-02, -3.144e-01, -2.373e-01, 1.583e-01, -1.658e-01, 1.234e-01, 5.169e-01, 1.338e-01, -1.518e-01, -4.388e-02, -2.543e-01, 6.654e-02, 1.716e-01, 1.060e-01) * s0_1_2;
	r1 += M4(-5.477e-01, 1.261e-02, 1.910e-03, 3.346e-03, 2.875e-01, 2.145e-01, 7.439e-02, -7.545e-02, -2.300e-01, 6.591e-02, 8.250e-03, 8.145e-02, 1.677e-01, -1.103e-01, 7.862e-02, -1.495e-01) * s0_1_2;
	r2 += M4(-4.288e-01, 2.868e-01, -2.363e-01, -4.023e-02, -1.017e-01, -1.346e-01, -2.111e-02, 1.169e-01, -7.281e-02, 7.494e-02, -6.703e-02, -6.797e-03, 3.993e-01, 1.325e-01, 1.524e-01, -3.616e-02) * s0_1_2;
	r3 += M4(4.358e-02, -1.843e-02, 6.665e-02, -2.980e-01, -4.556e-01, -9.833e-02, 2.696e-02, 4.556e-02, -1.816e-02, 8.369e-02, 1.875e-02, -1.758e-02, 9.370e-02, 1.563e-01, -4.016e-02, 1.452e-01) * s0_1_2;
	r0 += M4(9.854e-01, -4.272e-02, 6.236e-02, -7.950e-01, -5.778e-01, -1.009e-02, -8.541e-04, -7.354e-02, 4.054e-01, 5.728e-01, -1.086e-01, -7.617e-02, 4.154e-03, -1.072e-01, 1.368e-01, 2.416e-01) * s0_2_0;
	r1 += M4(1.883e-01, -7.444e-02, 8.747e-02, 2.166e-02, -9.291e-02, 7.069e-02, 5.807e-02, -1.178e-01, -1.846e-01, 4.052e-02, 6.678e-03, -1.508e-01, -4.597e-02, -1.913e-02, 3.130e-02, -1.159e-01) * s0_2_0;
	r2 += M4(4.019e-02, 3.836e-02, 8.151e-02, 1.218e-01, -1.218e-02, -2.830e-02, 2.290e-02, 4.178e-02, 8.859e-01, 4.804e-02, 1.414e-01, 8.790e-02, 2.909e-03, 1.927e-01, 1.287e-01, 5.333e-02) * s0_2_0;
	r3 += M4(3.722e-02, 6.815e-02, -1.570e-03, -1.470e-01, 3.676e-02, -2.825e-02, 1.438e-02, -1.440e-01, -3.107e-02, -5.694e-02, -2.225e-02, 1.457e-01, 1.791e-02, -3.344e-02, -5.289e-03, -3.894e-03) * s0_2_0;
	r0 += M4(-7.155e-01, -4.122e-01, -3.447e-01, -6.275e-01, -7.475e-01, -2.122e-01, 9.793e-02, -5.710e-03, 5.404e-01, 2.118e-02, 8.762e-02, -2.801e-02, -9.349e-02, 1.057e-01, 2.042e-02, 7.006e-02) * s0_2_1;
	r1 += M4(2.979e-01, 1.376e-02, 5.389e-02, 2.642e-01, -2.557e-02, -3.725e-02, 4.379e-02, -3.334e-01, -1.328e-01, -5.433e-02, 4.988e-02, 1.958e-03, -1.879e-01, 3.708e-02, -2.197e-02, 4.570e-02) * s0_2_1;
	r2 += M4(-5.663e-01, 3.153e-01, -5.846e-03, -3.369e-02, -4.143e-01, 3.629e-01, 1.193e-01, 5.701e-02, 4.179e-01, 1.899e-01, -1.034e-02, 6.359e-02, 1.781e-01, -6.057e-02, 1.088e-02, 3.436e-03) * s0_2_1;
	r3 += M4(5.487e-02, 3.643e-02, 6.346e-03, -3.258e-01, 4.152e-02, 2.147e-02, -3.069e-02, 1.551e-03, 9.071e-02, 4.407e-02, 1.561e-01, -6.777e-02, 8.090e-02, 1.194e-01, -1.207e-01, 4.788e-02) * s0_2_1;
	r0 += M4(-5.639e-01, 6.665e-02, -6.278e-02, -3.371e-01, 4.371e-02, -1.917e-01, -1.064e-02, 1.460e-01, -7.224e-02, 1.133e-01, 9.594e-02, -5.571e-02, -1.456e-01, -1.771e-02, 2.564e-02, 6.133e-02) * s0_2_2;
	r1 += M4(3.414e-01, -5.378e-03, -6.374e-02, 1.650e-02, -7.624e-02, 5.447e-02, -3.281e-02, -2.067e-01, -1.325e-01, 2.382e-02, -2.539e-02, -6.944e-02, 7.830e-02, -4.355e-02, 2.664e-02, 2.781e-02) * s0_2_2;
	r2 += M4(-6.860e-01, 8.147e-02, 4.285e-02, 4.206e-02, -4.016e-01, 4.060e-02, -1.606e-01, -6.340e-02, -1.520e-01, 1.370e-01, -3.502e-02, 1.285e-01, 1.554e-01, 6.325e-02, 9.273e-02, 4.734e-03) * s0_2_2;
	r3 += M4(3.725e-02, -1.228e-01, 1.329e-02, -5.650e-02, -5.167e-02, 1.027e-02, 1.448e-03, 3.035e-02, -6.113e-02, 1.713e-02, 4.825e-02, 2.807e-02, -9.501e-03, -4.981e-02, 7.071e-02, 1.111e-01) * s0_2_2;
	r0 += M4(-2.089e-01, -4.472e-02, 5.339e-02, -7.565e-02, -1.465e-02, 7.271e-02, 1.223e-02, 1.909e-01, 7.229e-02, -9.016e-03, 9.293e-02, -5.469e-02, -1.000e+00, 6.467e-02, -1.324e-01, 1.963e-01) * s1_0_0;
	r1 += M4(-1.281e-02, 2.452e-03, 9.060e-02, 1.850e-01, 2.641e-02, -2.288e-02, 2.496e-02, -1.194e-02, -1.056e-01, -3.136e-02, 3.663e-03, -2.398e-01, -3.956e-03, -4.114e-02, -3.167e-02, -2.398e-02) * s1_0_0;
	r2 += M4(-1.513e-01, 2.806e-01, 1.735e-01, -5.631e-02, 1.170e-01, -5.175e-02, -8.305e-02, -6.726e-02, 1.563e-01, 1.803e-01, 7.884e-02, -5.767e-02, -8.785e-02, -4.716e-01, -2.622e-01, -4.248e-01) * s1_0_0;
	r3 += M4(-7.395e-03, 4.476e-02, 4.342e-02, -1.135e-01, 6.885e-03, -3.997e-03, -6.441e-02, 4.633e-02, 3.001e-02, 2.073e-01, 4.191e-02, -2.175e-01, 1.408e-02, -2.706e-01, 5.718e-03, 3.061e-01) * s1_0_0;
	r0 += M4(-6.209e-02, 1.134e-02, 1.841e-01, -1.851e-01, 4.561e-01, -1.508e-02, -9.178e-02, 3.217e-02, -1.009e-01, -9.036e-02, 3.343e-02, -1.048e-01, -9.241e-02, 2.985e-02, -3.755e-01, 1.674e-01) * s1_0_1;
	r1 += M4(2.228e-01, 4.917e-02, 1.954e-01, 9.220e-02, 2.035e-01, 7.869e-03, 1.590e-02, -4.658e-02, -6.575e-02, -1.153e-03, -7.391e-02, 6.391e-02, -8.800e-02, -2.356e-02, -9.948e-02, 1.356e-02) * s1_0_1;
	r2 += M4(-6.593e-01, 2.344e-01, 1.593e-01, 7.573e-02, 8.917e-02, -6.921e-02, -1.489e-01, -1.490e-01, -2.575e-01, -5.488e-01, -1.216e-02, -2.589e-01, 6.859e-01, -5.120e-01, -3.054e-01, 3.241e-01) * s1_0_1;
	r3 += M4(3.056e-02, -1.073e-01, 1.612e-01, -1.837e-01, 3.469e-02, 1.130e-01, -7.544e-02, 9.820e-02, -7.629e-02, 1.624e-01, -2.494e-01, -1.244e-01, 1.553e-01, -2.301e-01, -1.763e-02, 2.114e-01) * s1_0_1;
	r0 += M4(-1.973e-02, -3.393e-03, 1.515e-02, 2.971e-02, 3.932e-01, 3.634e-02, -6.761e-04, -1.191e-01, -3.004e-01, 8.889e-02, 2.548e-01, -8.623e-02, 5.471e-01, -5.796e-02, -3.982e-01, 4.901e-02) * s1_0_2;
	r1 += M4(4.228e-01, 1.186e-01, 1.549e-02, 1.365e-01, 6.193e-02, -1.467e-03, 1.998e-02, 5.813e-02, 2.404e-02, 1.567e-01, 1.676e-01, -2.598e-03, -1.264e-01, -7.307e-02, -1.947e-01, -4.445e-02) * s1_0_2;
	r2 += M4(-6.176e-02, -1.420e-01, -2.892e-02, -7.150e-02, -3.366e-02, 5.768e-02, 1.012e-02, 4.733e-02, 4.358e-03, 2.026e-01, -1.566e-02, -1.023e-01, 1.453e-01, -3.402e-01, -2.130e-01, 2.582e-02) * s1_0_2;
	r3 += M4(4.171e-02, 8.041e-02, 1.920e-02, -1.004e-01, -6.193e-02, 2.305e-02, 1.754e-01, -3.331e-02, -8.031e-02, -1.357e-01, -9.130e-02, -3.779e-01, -7.740e-02, 1.958e-01, 2.678e-02, 2.605e-01) * s1_0_2;
	r0 += M4(-1.000e+00, -2.636e-01, 3.970e-02, 1.139e-01, 4.114e-01, -3.186e-02, 4.672e-02, 6.982e-02, -2.607e-01, 5.346e-02, 9.064e-02, -2.324e-01, 5.438e-02, 8.805e-02, -4.283e-02, 2.187e-01) * s1_1_0;
	r1 += M4(-1.452e-01, -9.595e-02, 1.797e-02, 2.747e-01, 5.427e-02, -2.787e-02, 2.692e-02, 2.475e-02, -4.634e-02, 4.116e-03, 4.286e-02, 1.620e-02, 2.430e-01, 4.937e-03, -8.090e-03, -1.230e-01) * s1_1_0;
	r2 += M4(-7.803e-02, 2.536e-02, 2.315e-01, -7.674e-02, 4.018e-02, -6.664e-02, -3.132e-02, -4.925e-02, -1.056e-01, -2.113e-01, -9.561e-02, -4.035e-01, -2.135e-01, -2.468e-01, -1.047e-01, 2.034e-01) * s1_1_0;
	r3 += M4(-1.281e-02, 9.197e-02, -5.057e-02, 1.499e-01, 4.262e-02, 9.457e-03, -7.307e-02, -9.609e-03, 8.248e-02, 1.919e-01, -3.753e-02, 1.634e-01, 2.306e-02, -5.335e-02, -1.382e-03, 1.682e-02) * s1_1_0;
	r0 += M4(-2.711e-01, -4.599e-01, 5.479e-01, -2.342e-01, 7.188e-01, -1.370e-02, -9.268e-02, -7.765e-02, 1.530e-01, 3.001e-01, 8.115e-02, -4.147e-01, 5.468e-02, 3.369e-01, -3.344e-01, 2.251e-01) * s1_1_1;
	r1 += M4(-6.933e-01, 8.538e-03, 4.799e-02, -4.869e-02, 1.705e-01, -1.536e-02, 1.018e-01, 4.455e-02, 8.604e-01, 2.682e-01, 1.993e-02, 4.126e-01, -6.103e-01, -1.873e-01, -1.255e-01, 2.705e-01) * s1_1_1;
	r2 += M4(4.075e-01, -6.545e-01, 6.111e-01, 1.944e-01, 7.150e-02, -4.034e-02, -1.260e-01, 3.422e-02, 2.016e-01, -1.347e-01, 1.034e-01, 3.778e-01, -1.000e+00, -8.231e-02, -5.289e-01, -1.242e-02) * s1_1_1;
	r3 += M4(-4.042e-01, -1.725e-01, 8.771e-02, -1.499e-01, -1.093e-03, 2.827e-02, -1.625e-01, 3.138e-02, -2.999e-02, 4.279e-01, -2.225e-01, -1.409e-01, 1.389e-01, -1.484e-01, 2.056e-01, 2.528e-01) * s1_1_1;
	r0 += M4(2.292e-02, -8.926e-02, -6.378e-02, -3.444e-02, 3.634e-01, -9.084e-04, 8.036e-03, 9.376e-03, -4.600e-01, 1.194e-01, 3.586e-01, -2.551e-01, 3.900e-01, 1.755e-02, -6.215e-01, 2.495e-01) * s1_1_2;
	r1 += M4(1.147e-01, 1.513e-01, 1.256e-01, 2.634e-01, 1.388e-01, -3.634e-02, 3.994e-02, 6.131e-02, 6.971e-01, 5.995e-01, 4.729e-01, 1.079e-01, -2.329e-01, -1.017e-01, -3.007e-01, 2.176e-01) * s1_1_2;
	r2 += M4(-1.040e-01, -7.636e-02, -7.247e-02, -2.511e-02, 2.144e-01, 8.770e-02, 3.207e-02, -4.856e-02, -1.538e-01, 1.754e-01, 2.629e-01, 1.757e-01, 4.307e-01, -8.037e-02, -4.689e-01, -8.098e-02) * s1_1_2;
	r3 += M4(-3.113e-01, -1.030e-01, -1.482e-01, -2.957e-02, -1.380e-02, -1.861e-02, 1.685e-01, 1.894e-02, -4.390e-01, 8.016e-02, -1.549e-01, -1.173e-01, -1.469e-01, 6.767e-02, 3.192e-01, 3.300e-01) * s1_1_2;
	r0 += M4(-2.590e-01, -3.707e-02, -2.832e-02, -1.378e-01, 4.195e-01, -6.099e-03, 5.214e-02, 1.594e-01, -3.889e-01, -1.284e-01, 1.666e-01, -1.278e-01, 2.266e-01, -5.762e-02, 1.268e-02, 2.866e-01) * s1_2_0;
	r1 += M4(1.724e-01, -4.087e-02, 4.102e-03, 1.235e-01, 2.645e-02, 1.013e-02, 6.555e-02, -7.343e-02, -1.899e-01, 1.863e-02, -1.886e-02, -1.074e-01, 5.304e-02, 1.068e-02, 8.079e-02, -1.119e-01) * s1_2_0;
	r2 += M4(2.087e-01, 1.411e-01, -1.367e-02, 6.393e-02, 1.039e-01, 2.445e-01, -2.339e-02, 6.541e-02, 5.558e-02, 5.039e-02, 1.392e-01, -1.273e-02, 4.103e-02, 4.564e-02, -3.936e-02, 2.249e-02) * s1_2_0;
	r3 += M4(-2.740e-02, -2.088e-03, 3.777e-02, 6.318e-02, 3.357e-02, 1.149e-01, -3.650e-02, -4.209e-02, 5.460e-02, -6.312e-02, -9.326e-03, 7.435e-02, -2.127e-02, 1.667e-02, -4.361e-02, -1.957e-01) * s1_2_0;
	r0 += M4(-1.225e-01, -5.352e-01, 1.580e-01, 1.829e-01, 5.530e-01, -1.170e-01, -3.609e-03, -2.951e-01, 8.044e-02, 2.692e-02, -1.494e-02, -3.139e-01, -1.764e-01, -4.454e-01, -1.235e-01, 5.109e-01) * s1_2_1;
	r1 += M4(-2.477e-02, -1.240e-02, 3.533e-02, 4.010e-02, 1.537e-01, -2.485e-02, 7.146e-02, -3.122e-02, -1.227e-01, 1.274e-01, 1.248e-02, -3.264e-02, 2.553e-01, -1.439e-01, 8.734e-02, -8.422e-02) * s1_2_1;
	r2 += M4(5.908e-01, -1.756e-01, 1.334e-01, 6.752e-02, 2.218e-01, 3.259e-01, -2.434e-02, 1.250e-01, -3.212e-01, 1.489e-01, -5.616e-02, -3.469e-02, -8.230e-01, 2.356e-01, 1.055e-01, -2.346e-03) * s1_2_1;
	r3 += M4(-7.352e-02, -1.588e-02, 1.243e-01, -5.825e-03, 8.771e-02, 4.854e-02, 4.464e-02, 1.030e-02, -9.643e-02, 8.095e-02, -9.593e-02, -3.422e-03, 1.255e-01, 1.583e-02, -2.510e-02, 1.152e-01) * s1_2_1;
	r0 += M4(3.866e-01, -5.425e-02, -6.670e-02, -2.190e-02, 5.288e-02, 5.653e-02, -6.296e-02, 1.325e-01, 4.770e-01, -6.465e-01, 1.623e-01, -2.765e-02, -1.000e+00, 2.767e-01, -3.830e-01, 2.714e-01) * s1_2_2;
	r1 += M4(1.452e-01, -5.601e-02, 5.085e-02, 1.716e-02, -1.364e-03, 1.281e-02, 5.384e-03, 6.554e-02, 2.073e-01, 7.661e-02, 1.771e-02, -7.232e-02, -1.383e-01, -3.724e-02, -4.640e-02, 2.356e-02) * s1_2_2;
	r2 += M4(-2.832e-01, 1.335e-01, -1.059e-01, -1.607e-02, 3.118e-01, 1.887e-01, 2.894e-02, -1.979e-02, 5.389e-02, 2.326e-01, -6.764e-02, -9.686e-02, -3.128e-01, -9.617e-02, -7.661e-02, -2.908e-03) * s1_2_2;
	r3 += M4(-3.627e-03, 1.330e-02, -1.047e-01, -4.812e-02, -1.372e-01, -3.956e-02, -1.789e-02, 1.452e-01, 7.041e-02, 8.161e-03, 1.630e-01, -1.208e-01, -3.189e-01, -5.539e-02, -1.245e-01, 3.202e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2]; s1_0_0 = G[3][xy.y+0][xy.x+0];
	s1_0_1 = G[3][xy.y+0][xy.x+1]; s1_0_2 = G[3][xy.y+0][xy.x+2];
	s1_1_0 = G[3][xy.y+1][xy.x+0]; s1_1_1 = G[3][xy.y+1][xy.x+1];
	s1_1_2 = G[3][xy.y+1][xy.x+2]; s1_2_0 = G[3][xy.y+2][xy.x+0];
	s1_2_1 = G[3][xy.y+2][xy.x+1]; s1_2_2 = G[3][xy.y+2][xy.x+2];
	r0 += M4(1.977e-01, -1.185e-01, -1.028e-01, -5.354e-02, 5.291e-04, 1.004e-02, 4.064e-02, -3.286e-02, -2.788e-01, -9.175e-03, 2.876e-02, -6.307e-02, -1.940e-01, 9.649e-02, -1.536e-02, -2.027e-01) * s0_0_0;
	r1 += M4(-2.251e-01, 4.235e-02, 6.444e-03, 6.864e-02, -1.870e-02, -1.375e-02, -4.166e-02, -7.481e-02, 3.506e-03, -9.950e-03, 4.624e-02, -8.922e-03, 6.427e-01, 2.801e-02, 1.414e-03, -1.529e-01) * s0_0_0;
	r2 += M4(3.969e-02, 1.771e-01, 1.148e-01, 2.902e-01, 1.392e-01, -2.761e-01, -5.041e-02, -3.047e-01, 7.532e-02, 5.076e-02, 1.623e-01, 8.849e-02, 1.046e-01, -1.555e-01, -2.282e-01, -5.228e-01) * s0_0_0;
	r3 += M4(-5.708e-02, -6.413e-02, 4.024e-02, 2.346e-02, -2.596e-02, 1.189e-01, -9.309e-02, -7.802e-02, 2.807e-02, 1.092e-02, 3.263e-02, -1.047e-01, -6.620e-02, 2.651e-01, 5.381e-02, 1.517e-01) * s0_0_0;
	r0 += M4(-5.094e-01, -1.301e-02, -1.257e-01, -1.085e-01, 2.530e-02, -1.038e-01, -4.024e-02, 3.261e-02, -4.623e-02, 1.319e-02, -6.663e-02, -2.888e-02, 2.125e-01, 1.472e-02, 3.347e-01, -1.120e-01) * s0_0_1;
	r1 += M4(-4.752e-01, -5.840e-02, -1.415e-01, 3.810e-02, -3.960e-01, 4.211e-02, 3.612e-02, -7.063e-02, 6.206e-02, 1.035e-01, -1.733e-01, -8.669e-02, -4.727e-01, 5.085e-02, 7.671e-02, 1.136e-01) * s0_0_1;
	r2 += M4(-4.749e-03, 1.276e-01, 2.631e-01, 1.841e-01, -9.131e-03, -1.056e-01, -7.352e-02, -1.045e-01, 4.218e-01, 1.111e-01, 2.084e-01, 4.435e-01, -1.069e-02, -1.994e-01, -1.303e-01, 4.754e-03) * s0_0_1;
	r3 += M4(-1.421e-03, -1.644e-01, -1.478e-01, 1.678e-01, 3.799e-02, -1.289e-02, -4.313e-02, -1.190e-01, -1.197e-01, 1.953e-01, -1.967e-02, 2.686e-01, 4.829e-02, 4.209e-02, 4.029e-01, 2.073e-01) * s0_0_1;
	r0 += M4(-1.232e-01, -1.004e-01, 2.989e-02, 2.396e-01, -2.970e-01, 3.748e-02, -6.785e-02, -1.281e-01, 3.648e-02, 4.863e-03, 1.154e-01, 2.800e-03, 1.677e-01, -3.589e-02, 3.115e-02, 9.491e-02) * s0_0_2;
	r1 += M4(-6.359e-01, 1.792e-02, -1.041e-01, -4.799e-02, 2.491e-01, -5.678e-03, -8.420e-02, -8.313e-02, -9.441e-01, -5.076e-02, -7.668e-02, -3.710e-02, 8.081e-02, -7.647e-02, 6.418e-02, -4.630e-02) * s0_0_2;
	r2 += M4(-6.370e-02, -1.154e-01, -3.212e-02, -1.455e-01, 1.912e-02, 3.630e-02, 2.943e-02, 1.308e-02, -1.931e-02, 8.677e-02, 5.738e-02, 8.604e-02, -8.678e-02, -2.050e-01, -1.059e-01, -1.160e-01) * s0_0_2;
	r3 += M4(3.943e-02, -1.305e-01, -7.235e-03, -6.563e-02, -9.144e-02, -1.489e-02, 2.720e-03, -4.492e-02, 1.141e-01, -8.825e-02, 9.642e-02, 2.742e-01, 3.592e-03, 1.113e-01, -2.058e-02, -1.344e-01) * s0_0_2;
	r0 += M4(-2.245e-01, 3.464e-02, -9.221e-02, 6.841e-02, -5.077e-02, 1.958e-02, 1.644e-02, -3.313e-02, -1.104e-01, 1.520e-01, 5.838e-02, -1.430e-01, -2.996e-01, 6.631e-02, -1.127e-01, 6.854e-01) * s0_1_0;
	r1 += M4(-3.550e-02, -1.604e-02, -9.586e-02, 1.013e-01, -8.232e-02, 6.184e-02, 1.366e-02, -4.083e-02, 2.960e-01, -3.075e-02, 8.432e-02, 6.632e-02, -3.779e-01, -1.495e-01, 1.857e-01, -3.639e-02) * s0_1_0;
	r2 += M4(2.290e-01, 8.751e-02, -1.808e-01, 3.572e-02, 9.928e-02, 6.798e-02, -1.645e-01, -2.452e-01, -2.786e-01, -3.744e-03, 2.569e-01, -4.406e-02, -6.856e-01, 7.035e-02, -2.812e-01, -2.056e-01) * s0_1_0;
	r3 += M4(-2.977e-03, -5.959e-02, 9.963e-03, 8.654e-02, -5.431e-02, 1.168e-01, -1.130e-02, -1.798e-01, 4.450e-02, -1.167e-01, 3.334e-02, 4.179e-02, 1.418e-01, -2.276e-01, 6.340e-02, 2.505e-01) * s0_1_0;
	r0 += M4(-4.752e-01, -3.119e-02, -1.665e-01, 2.119e-01, -5.405e-01, 6.580e-02, 2.669e-01, -3.154e-01, -3.058e-01, -2.861e-01, 6.267e-02, 1.559e-01, -3.500e-01, 6.339e-02, 1.694e-01, 5.722e-03) * s0_1_1;
	r1 += M4(-4.300e-01, 1.459e-01, -1.094e-01, -5.913e-01, -3.139e-01, -3.192e-01, 5.263e-01, 1.720e-02, -5.993e-02, 5.868e-02, 1.931e-01, 5.164e-02, -1.000e+00, -2.000e-01, -2.535e-01, 1.467e-01) * s0_1_1;
	r2 += M4(6.368e-01, 9.764e-02, -1.745e-01, -3.831e-01, 5.213e-01, -1.316e-01, -3.662e-01, -2.228e-01, -5.686e-01, -1.173e-01, 1.980e-01, 2.208e-02, -3.834e-01, 1.150e-01, -1.525e-02, -2.538e-02) * s0_1_1;
	r3 += M4(-3.395e-01, -4.706e-02, 1.753e-02, -3.660e-01, 3.820e-01, -5.673e-01, -5.284e-02, -2.808e-01, -7.355e-02, 6.990e-02, 7.813e-03, 8.869e-03, 6.358e-01, -8.142e-02, 6.695e-02, 3.279e-03) * s0_1_1;
	r0 += M4(-4.547e-01, 9.589e-02, 2.912e-02, -1.479e-01, -4.548e-02, -1.830e-01, -8.745e-02, -7.081e-02, -4.516e-01, 5.109e-02, -1.714e-01, 1.486e-01, -9.216e-02, 1.026e-01, 1.199e-01, -1.814e-01) * s0_1_2;
	r1 += M4(-5.498e-01, -3.384e-01, -1.491e-01, 7.662e-02, 1.493e-02, 2.212e-01, 1.112e-04, 2.003e-01, -2.782e-01, -2.765e-01, -2.273e-01, -5.405e-02, 4.441e-01, 9.527e-02, 9.594e-02, -3.713e-02) * s0_1_2;
	r2 += M4(-5.606e-01, -7.487e-01, 3.331e-01, -1.186e-01, -2.322e-01, 2.196e-01, -2.896e-01, 6.912e-02, 2.603e-01, -1.255e-01, 3.608e-02, -9.706e-02, -3.066e-01, -1.796e-02, -4.390e-02, 4.708e-02) * s0_1_2;
	r3 += M4(4.816e-01, -4.752e-02, 1.697e-01, -2.455e-03, -2.198e-01, 1.623e-01, 2.063e-01, -4.784e-02, 2.116e-01, -1.303e-01, 2.238e-01, 1.356e-01, -1.209e-01, 1.299e-01, 4.342e-03, 1.760e-02) * s0_1_2;
	r0 += M4(1.392e-01, 4.121e-03, 2.835e-02, 8.556e-02, -2.237e-01, -2.132e-01, 5.018e-02, -1.975e-01, -6.328e-02, 1.341e-02, 1.276e-01, -9.851e-02, 1.636e-01, -3.280e-01, -1.616e-01, -3.037e-01) * s0_2_0;
	r1 += M4(2.335e-01, 1.541e-02, -4.478e-02, -1.368e-02, -4.918e-02, 1.834e-02, -3.776e-02, -1.894e-02, 1.192e-01, 1.002e-02, 4.879e-02, -3.592e-02, 4.349e-02, -9.587e-03, -5.760e-02, 1.311e-01) * s0_2_0;
	r2 += M4(-1.968e-01, 1.289e-02, 8.507e-02, 1.868e-01, -9.330e-02, -2.881e-01, -1.257e-01, -1.077e-01, 2.543e-01, 9.072e-02, 2.644e-02, -1.173e-01, 2.588e-01, -2.056e-01, -1.825e-01, 5.414e-02) * s0_2_0;
	r3 += M4(-7.412e-02, -8.841e-02, 6.009e-02, -7.191e-02, -2.084e-03, 9.053e-02, 2.083e-03, 2.084e-01, 1.127e-02, -1.016e-01, 2.577e-02, -1.647e-01, 3.892e-02, 5.071e-02, -1.709e-02, 2.849e-03) * s0_2_0;
	r0 += M4(6.834e-01, 4.752e-01, 2.256e-01, -1.607e-01, 4.678e-01, 3.818e-01, -1.203e-02, -4.781e-01, -5.992e-01, -5.721e-01, 1.421e-01, 1.826e-01, 2.615e-01, 5.932e-01, 1.576e-02, -6.253e-02) * s0_2_1;
	r1 += M4(-4.210e-01, 7.051e-02, -1.950e-02, -8.439e-02, 2.985e-01, 1.277e-01, -1.656e-01, 5.572e-02, 3.623e-02, -4.160e-02, 2.843e-01, -2.398e-01, -1.215e-01, -1.412e-01, 8.427e-02, 1.456e-01) * s0_2_1;
	r2 += M4(-7.664e-02, 3.161e-02, -3.199e-02, 2.612e-02, -1.000e+00, -8.811e-03, -3.180e-01, 1.060e-01, 1.649e-01, -3.147e-02, 1.333e-01, 9.132e-02, 2.815e-01, -1.214e-01, 1.276e-01, 4.097e-02) * s0_2_1;
	r3 += M4(1.425e-02, 1.396e-01, 4.298e-02, -5.528e-02, -1.890e-01, -1.058e-01, 3.797e-02, -1.164e-01, 5.100e-02, -1.214e-02, 1.704e-02, -1.855e-01, 2.976e-02, 1.908e-02, -5.844e-02, -2.297e-02) * s0_2_1;
	r0 += M4(-9.576e-02, 9.398e-02, 1.452e-02, -3.603e-01, 3.201e-01, -2.632e-01, 5.307e-03, -1.111e-01, -1.626e-01, 6.038e-02, -4.520e-02, 7.013e-02, 4.991e-01, -2.577e-01, -2.539e-02, -2.439e-02) * s0_2_2;
	r1 += M4(-2.716e-01, -8.120e-02, 3.471e-02, 1.695e-01, 4.640e-01, 5.040e-02, -2.514e-01, -4.968e-02, -2.379e-01, -8.067e-02, 9.475e-02, -4.792e-02, -6.062e-02, -3.260e-02, -4.449e-02, 4.893e-02) * s0_2_2;
	r2 += M4(4.433e-02, 3.043e-01, 1.333e-01, -2.389e-03, -7.001e-01, 7.776e-02, -2.752e-01, -4.258e-02, 1.290e-01, -6.207e-03, 3.666e-02, -3.202e-02, 1.754e-01, 1.782e-01, 3.799e-03, 1.737e-02) * s0_2_2;
	r3 += M4(1.853e-01, -2.100e-01, -2.154e-02, 6.701e-02, 9.543e-02, 1.112e-01, 1.120e-01, -1.802e-01, -7.771e-02, 2.283e-03, 8.802e-04, -5.790e-02, 9.120e-03, -5.461e-02, 1.212e-01, 2.945e-02) * s0_2_2;
	r0 += M4(2.153e-01, -6.381e-02, 1.255e-03, -5.100e-04, -2.562e-01, -2.341e-02, -6.817e-02, -3.467e-02, 2.054e-01, 4.740e-02, 1.464e-01, 1.693e-01, 3.279e-01, -3.958e-02, -9.341e-02, 3.260e-02) * s1_0_0;
	r1 += M4(9.970e-02, -2.024e-03, 1.834e-03, 3.732e-02, -4.165e-02, 1.277e-02, -5.090e-02, 2.219e-01, -3.527e-01, -6.396e-02, 4.583e-02, -3.018e-01, -1.288e-01, 3.337e-02, -3.158e-02, -1.363e-01) * s1_0_0;
	r2 += M4(1.246e-01, -1.025e-01, 2.424e-02, 1.591e-01, -8.418e-04, -9.383e-03, 4.735e-02, 7.704e-03, 1.833e-01, 5.632e-02, -2.957e-02, -1.768e-01, 2.812e-01, -2.192e-01, -1.115e-01, -4.154e-01) * s1_0_0;
	r3 += M4(-1.753e-02, -2.176e-02, -4.671e-03, -9.633e-02, -5.470e-02, -1.143e-01, -3.541e-02, -4.206e-03, 6.849e-02, -8.360e-02, -1.174e-01, -1.409e-01, -2.044e-02, 2.266e-02, -1.838e-02, 1.134e-01) * s1_0_0;
	r0 += M4(1.650e-01, -5.601e-02, 6.616e-02, -3.001e-02, -2.067e-02, -3.100e-02, 7.919e-02, 9.093e-02, -9.359e-02, 3.090e-02, -3.101e-01, 3.099e-02, 2.608e-01, 2.192e-02, -1.668e-02, -1.353e-01) * s1_0_1;
	r1 += M4(-1.648e-01, -6.368e-02, 1.128e-01, -1.758e-02, 2.688e-02, 1.052e-02, 2.521e-01, -2.029e-03, 3.229e-01, -3.675e-02, -8.619e-02, 5.384e-02, 2.887e-01, 1.913e-03, 1.541e-03, -1.058e-02) * s1_0_1;
	r2 += M4(1.990e-01, 4.230e-01, 9.309e-02, 2.430e-01, -3.970e-01, 6.692e-01, 1.297e-02, 8.238e-02, 1.091e-01, -2.944e-01, -6.388e-02, -1.158e-01, 6.190e-03, -7.006e-01, -9.030e-02, -2.760e-01) * s1_0_1;
	r3 += M4(1.014e-01, -2.519e-01, -3.625e-04, -2.143e-01, 6.404e-02, -2.218e-01, 1.236e-01, -2.448e-01, -2.736e-02, 6.577e-02, -1.801e-01, 1.058e-01, 4.485e-02, 8.325e-02, 2.298e-02, 1.280e-01) * s1_0_1;
	r0 += M4(-1.382e-01, 1.334e-01, -6.092e-02, 2.411e-02, -2.557e-01, -5.878e-02, -2.806e-02, -8.615e-02, -2.849e-02, 7.883e-02, -1.869e-02, -7.884e-02, 8.056e-02, -4.923e-02, 3.330e-02, 2.508e-02) * s1_0_2;
	r1 += M4(5.972e-01, -1.118e-02, 1.056e-01, -9.772e-02, 7.975e-02, -2.506e-02, 1.135e-01, -1.540e-01, -2.361e-01, 1.277e-02, -9.076e-02, 2.415e-02, 5.097e-02, 2.853e-02, 2.134e-02, 2.832e-02) * s1_0_2;
	r2 += M4(2.859e-02, 2.148e-01, 1.873e-02, 1.311e-01, -1.404e-01, 2.432e-01, 2.363e-02, 1.698e-01, 1.960e-01, 1.414e-01, 1.270e-01, 5.851e-02, 1.005e-01, -1.935e-01, -5.779e-02, -4.237e-02) * s1_0_2;
	r3 += M4(5.859e-02, 1.901e-02, -1.698e-02, -2.439e-01, 1.005e-01, -4.053e-02, -1.410e-01, -1.001e-01, -1.014e-01, -1.547e-01, 1.330e-01, -1.960e-02, -1.055e-01, 9.808e-02, 1.668e-01, -1.510e-01) * s1_0_2;
	r0 += M4(-1.381e-01, -6.797e-02, -2.223e-02, -1.298e-01, -3.367e-01, 2.795e-02, -7.954e-02, -1.776e-03, 3.768e-01, 2.092e-01, 4.014e-02, -1.139e-01, 2.287e-01, 1.814e-01, -6.529e-02, 1.826e-01) * s1_1_0;
	r1 += M4(-1.636e-01, 6.086e-02, -6.219e-02, -1.750e-02, 3.753e-01, 7.163e-02, 6.164e-02, -2.532e-02, 4.995e-01, -2.714e-02, 4.978e-02, -5.271e-02, -6.115e-02, 3.676e-02, 1.511e-02, -1.157e-01) * s1_1_0;
	r2 += M4(1.446e-01, 1.060e-01, 3.846e-02, 3.298e-01, 4.265e-01, 2.836e-01, 1.313e-01, 4.059e-01, -3.504e-01, -3.751e-01, -1.958e-01, 2.237e-01, -2.245e-01, 9.277e-02, -1.815e-01, 1.289e-01) * s1_1_0;
	r3 += M4(-7.449e-02, 4.567e-02, 6.049e-02, 5.745e-02, -8.503e-02, -8.501e-02, 3.889e-02, -1.493e-01, 5.490e-02, 4.668e-02, -4.707e-02, -1.141e-01, -6.173e-02, -1.441e-01, 8.832e-02, 8.998e-02) * s1_1_0;
	r0 += M4(-4.594e-03, 1.127e-01, 3.648e-02, -1.359e-01, 4.657e-01, -2.694e-02, 5.254e-01, -4.338e-01, 1.157e-01, 2.628e-01, 2.525e-01, 1.246e-01, -6.458e-02, 1.964e-01, -9.395e-02, 3.603e-01) * s1_1_1;
	r1 += M4(-2.811e-01, 2.098e-01, -2.709e-01, -1.334e-01, -6.464e-01, -1.040e-01, -2.866e-01, -3.602e-01, -7.567e-01, -6.431e-02, -1.441e-01, 4.599e-01, 9.160e-01, -1.265e-01, -4.025e-02, 3.729e-01) * s1_1_1;
	r2 += M4(-1.861e-01, 5.516e-01, 7.243e-02, -1.358e-01, 1.108e-01, -1.293e-01, 4.512e-01, 2.929e-01, 8.920e-02, 3.386e-02, 2.872e-01, 1.796e-01, -4.888e-01, 5.316e-02, -2.474e-01, 7.581e-02) * s1_1_1;
	r3 += M4(-4.405e-02, -2.194e-01, -1.959e-01, -1.149e-02, 1.534e-01, 6.774e-01, 1.905e-01, 7.310e-01, 2.302e-01, 3.390e-01, -3.135e-02, 2.868e-01, 1.329e-01, 2.730e-01, 6.502e-02, 1.223e-01) * s1_1_1;
	r0 += M4(-1.458e-01, -8.137e-02, 2.041e-01, 2.663e-01, 1.823e-01, -1.087e-02, -1.465e-01, 5.394e-02, -2.504e-01, -5.953e-02, -1.957e-01, 1.337e-01, -3.602e-01, 6.612e-02, -6.318e-03, -1.441e-01) * s1_1_2;
	r1 += M4(-7.964e-01, -7.818e-02, 6.413e-02, -2.122e-01, -3.297e-01, 1.525e-01, -6.010e-02, -1.369e-02, -2.763e-01, 9.098e-02, -4.987e-02, 1.925e-01, -1.000e+00, 6.274e-02, -7.683e-02, -9.852e-02) * s1_1_2;
	r2 += M4(-2.165e-01, -2.276e-01, 8.192e-02, 5.124e-02, 1.835e-01, -1.062e-02, 1.653e-02, -7.595e-02, 4.693e-01, 2.312e-02, -3.358e-02, -4.663e-02, -2.344e-01, -9.404e-02, -2.636e-02, -2.372e-02) * s1_1_2;
	r3 += M4(-4.003e-02, -2.644e-02, -3.419e-01, -1.832e-01, -1.592e-01, -3.154e-02, -1.978e-01, -1.358e-01, -1.057e-01, -5.272e-02, 2.499e-01, 2.130e-01, -2.245e-01, 8.632e-03, 3.272e-01, 7.964e-02) * s1_1_2;
	r0 += M4(4.141e-01, -2.366e-02, 1.561e-01, 2.159e-01, 1.204e-01, 8.906e-02, -1.508e-01, -2.323e-02, -6.090e-01, -1.178e-01, 9.642e-02, -2.246e-02, -1.289e-01, -9.890e-02, 1.059e-01, -1.617e-01) * s1_2_0;
	r1 += M4(-1.705e-01, 2.888e-02, -3.012e-02, 2.368e-02, 2.021e-01, -3.226e-02, -9.364e-03, 1.880e-01, -1.809e-01, -9.297e-03, 9.441e-02, -2.997e-01, -2.326e-01, 2.629e-02, -1.125e-02, -3.016e-01) * s1_2_0;
	r2 += M4(-3.125e-02, 6.800e-02, 2.247e-01, 5.354e-02, -2.356e-01, 8.789e-02, -9.573e-02, -1.213e-01, 2.797e-01, -3.453e-03, 1.409e-02, -1.001e-01, -4.231e-01, -1.225e-01, 8.184e-02, 9.148e-02) * s1_2_0;
	r3 += M4(1.813e-02, -1.458e-02, 1.562e-02, 2.065e-01, -7.414e-02, -4.309e-02, 2.106e-03, -9.677e-03, 2.868e-02, -4.948e-02, -1.273e-01, -3.943e-01, 4.464e-02, -5.336e-03, 3.306e-02, -3.616e-02) * s1_2_0;
	r0 += M4(5.443e-01, 3.297e-01, -7.887e-02, -1.184e-01, -1.918e-01, -6.501e-01, -5.492e-03, 4.031e-01, 1.178e-02, -5.989e-01, -1.809e-02, 1.885e-01, 1.000e+00, -1.000e+00, -6.624e-01, -2.007e-01) * s1_2_1;
	r1 += M4(-1.873e-01, 5.193e-02, -2.128e-01, 2.202e-01, 2.394e-01, 3.542e-02, -2.095e-02, 1.994e-01, -1.219e-01, -1.180e-01, 3.061e-01, -1.575e-01, -1.246e-01, 9.317e-02, -1.091e-01, 3.603e-02) * s1_2_1;
	r2 += M4(3.420e-01, -2.592e-01, -2.687e-02, 3.757e-02, 6.927e-02, -4.446e-01, -7.390e-02, -1.751e-01, -4.209e-01, 2.481e-01, 6.399e-02, 1.018e-01, -1.000e+00, 5.693e-01, -2.569e-01, 2.626e-01) * s1_2_1;
	r3 += M4(-2.767e-02, 1.289e-02, -4.759e-03, 1.810e-01, -3.801e-02, -2.541e-03, 8.050e-02, 1.036e-01, 9.768e-02, 4.499e-02, -1.611e-02, 1.313e-01, 3.550e-02, 7.861e-02, -1.781e-01, 6.164e-02) * s1_2_1;
	r0 += M4(-2.397e-01, 1.875e-01, 3.659e-02, -2.443e-01, -2.036e-01, 2.947e-01, -2.484e-02, -8.198e-02, -1.539e-01, -6.543e-02, -9.618e-02, 2.075e-01, -2.375e-02, 3.077e-03, -5.865e-02, -2.154e-01) * s1_2_2;
	r1 += M4(-3.944e-02, 7.211e-02, -1.348e-02, 2.768e-02, -3.817e-02, 1.435e-02, 5.919e-02, 4.234e-02, 6.459e-02, 3.676e-02, -7.724e-02, -1.078e-01, -4.329e-02, -7.328e-02, -1.093e-01, 8.426e-02) * s1_2_2;
	r2 += M4(5.903e-02, -6.356e-02, 2.497e-02, 7.815e-02, 3.245e-01, -3.703e-01, 8.134e-02, 6.997e-02, -1.309e-01, 1.386e-01, 6.380e-04, -4.627e-02, -4.336e-01, 2.235e-01, -5.654e-02, 1.895e-02) * s1_2_2;
	r3 += M4(1.364e-01, 1.130e-01, 6.799e-02, -2.238e-01, -3.017e-02, 9.494e-02, -1.572e-01, -1.491e-01, -1.710e-01, -6.511e-02, 1.069e-01, 1.189e-01, -1.199e-02, -5.726e-02, 1.901e-01, 6.369e-02) * s1_2_2;
	r0 += V4(9.426e-04, 2.051e-02, -3.443e-02, 4.614e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(2.640e-02, -4.707e-03, 2.231e-02, -1.792e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(1.242e-02, -1.125e-02, -2.571e-02, -4.941e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r2));
	r3 += V4(-1.575e-02, -2.705e-02, -6.058e-03, -1.294e-02);
	r3 = clamp(r3, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r3));
}

//!DESC [CuNNy_4x16_DS] -conv3
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv2
//!BIND LUMA
//!SAVE conv3
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
shared V4 G[4][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			vec2 p;
			p = vec2(clamp(pos + ivec2(x - 1, y - 1), ivec2(0), sz) * ivec2(2, 2) + ivec2(1, 1)) * conv2_pt;
			V4 sr0 = V4(conv2_gather(p, 0));
			V4 sg0 = V4(conv2_gather(p, 1));
			V4 sb0 = V4(conv2_gather(p, 2));
			V4 sa0 = V4(conv2_gather(p, 3));
			G[0][ay][ax] = V4(sr0.w, sg0.w, sb0.w, sa0.w);
			G[1][ay][ax] = V4(sr0.z, sg0.z, sb0.z, sa0.z);
			G[2][ay][ax] = V4(sr0.x, sg0.x, sb0.x, sa0.x);
			G[3][ay][ax] = V4(sr0.y, sg0.y, sb0.y, sa0.y);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2, r3;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0); r3 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-4.029e-02, -2.026e-02, 5.404e-02, 1.832e-01, 1.480e-01, 4.882e-02, -9.512e-02, -5.824e-02, 2.489e-02, -1.189e-02, 2.082e-02, 9.979e-02, 1.415e-01, 6.782e-02, 3.209e-02, -3.330e-02) * s0_0_0;
	r1 += M4(2.238e-02, 1.277e-02, 1.689e-02, -3.643e-03, 1.111e-01, -3.794e-02, 6.370e-02, -6.816e-02, -7.774e-02, 2.496e-02, -3.274e-02, 2.760e-02, 6.537e-02, -6.221e-02, -2.967e-02, -8.326e-03) * s0_0_0;
	r2 += M4(9.053e-02, 9.208e-02, 1.046e-01, 1.475e-02, -2.190e-01, -4.123e-02, 2.741e-01, 8.917e-02, 1.199e-01, 1.241e-01, -1.022e-01, -4.432e-02, -2.550e-01, -1.486e-01, -2.181e-01, -8.754e-04) * s0_0_0;
	r3 += M4(9.672e-03, 1.065e-01, 6.405e-02, -7.072e-03, -1.404e-02, -2.460e-02, -4.112e-02, 1.543e-02, -5.237e-03, 1.072e-02, -3.822e-02, -4.833e-02, 6.636e-03, -5.556e-02, -6.071e-02, 3.457e-02) * s0_0_0;
	r0 += M4(5.929e-02, 1.863e-02, -2.404e-02, -3.014e-02, 4.804e-01, 2.673e-01, 2.697e-01, 2.073e-01, -6.873e-02, -2.280e-01, -2.313e-01, -1.114e-01, -2.484e-01, -7.567e-02, -5.507e-02, 1.112e-01) * s0_0_1;
	r1 += M4(3.177e-02, 2.423e-01, 3.773e-02, -2.849e-02, -2.170e-02, -1.506e-01, 6.030e-02, 3.988e-02, 4.007e-02, -1.974e-01, -3.156e-01, 3.332e-03, -4.275e-02, -5.254e-01, -4.342e-01, -1.387e-02) * s0_0_1;
	r2 += M4(-1.548e-01, -5.031e-02, -1.379e-01, -1.529e-02, -7.198e-03, 1.748e-01, -7.290e-02, 3.642e-01, 2.378e-01, 1.726e-01, -6.855e-02, -1.376e-01, 2.948e-01, 1.351e-01, 1.972e-01, 5.400e-02) * s0_0_1;
	r3 += M4(6.331e-02, -2.081e-03, -1.028e-01, -9.409e-02, -1.748e-01, 4.530e-01, 3.709e-01, -3.054e-01, 9.965e-02, -6.789e-02, -3.272e-01, 8.281e-02, 7.855e-02, 1.050e-01, 7.824e-02, -7.625e-03) * s0_0_1;
	r0 += M4(1.227e-01, -4.099e-03, 5.586e-02, 1.685e-01, -4.871e-02, -2.596e-02, 1.593e-02, 3.273e-01, 1.780e-01, -3.028e-02, 1.652e-01, -3.561e-01, -1.404e-01, -1.686e-02, -4.041e-02, -3.088e-01) * s0_0_2;
	r1 += M4(1.395e-02, 3.883e-02, 1.155e-01, -2.729e-03, 1.747e-02, 6.240e-02, 3.324e-03, -6.663e-03, 3.021e-02, -1.134e-01, -1.750e-01, 1.225e-02, -4.375e-02, -9.331e-02, -2.313e-02, -1.038e-01) * s0_0_2;
	r2 += M4(9.248e-02, 2.973e-03, 2.816e-01, 7.929e-02, 1.296e-01, -6.697e-04, 3.625e-01, 3.650e-02, -7.663e-02, -1.326e-01, -2.393e-01, -1.386e-02, -1.325e-01, -9.862e-02, -4.800e-02, 9.239e-02) * s0_0_2;
	r3 += M4(3.062e-02, 2.959e-01, 5.988e-02, -1.247e-02, 1.015e-01, 3.077e-01, 3.058e-02, -7.810e-03, -6.121e-02, -9.959e-02, 4.131e-02, 3.110e-03, -4.842e-02, -4.175e-01, -2.340e-01, -1.959e-03) * s0_0_2;
	r0 += M4(6.450e-02, 2.813e-03, 3.777e-02, -3.201e-02, -8.233e-02, -2.676e-03, 2.410e-01, 8.889e-02, -4.406e-02, -1.220e-02, 7.338e-02, 1.060e-01, 6.172e-02, 2.081e-02, -6.087e-02, -1.813e-02) * s0_1_0;
	r1 += M4(-7.242e-02, -1.036e-01, 4.537e-02, -5.490e-02, -1.647e-02, -1.220e-01, -7.951e-02, 8.644e-02, 1.400e-02, -1.281e-01, -1.562e-01, 4.319e-02, 1.255e-01, 1.169e-01, 2.889e-02, 2.938e-02) * s0_1_0;
	r2 += M4(4.964e-02, 8.397e-02, -4.175e-01, 9.653e-02, 1.543e-01, 1.088e-01, -2.751e-01, -7.775e-02, 4.849e-02, 9.205e-03, 2.700e-01, -5.054e-02, -2.292e-01, -1.293e-01, 5.774e-02, -2.853e-02) * s0_1_0;
	r3 += M4(-9.274e-02, 1.225e-02, 1.271e-01, -7.952e-02, -5.965e-02, 7.646e-02, 2.116e-01, -5.290e-02, 1.305e-02, 3.071e-02, 2.000e-01, -1.189e-01, -2.013e-02, 1.864e-02, -7.031e-02, 5.848e-02) * s0_1_0;
	r0 += M4(-1.149e-01, 3.847e-03, -1.893e-01, -1.654e-01, -1.526e-01, 3.291e-01, -9.112e-02, -1.864e-01, -6.010e-02, -1.023e-01, -1.751e-01, -4.714e-02, -2.262e-01, -3.670e-02, 1.764e-01, 1.150e-01) * s0_1_1;
	r1 += M4(-8.937e-02, -4.528e-02, 9.106e-02, -2.679e-02, 3.190e-02, 1.795e-01, 6.561e-02, 3.946e-01, 1.990e-01, -6.592e-01, -4.230e-01, 9.777e-02, -4.359e-02, 3.245e-02, -1.997e-01, -1.317e-01) * s0_1_1;
	r2 += M4(1.323e-01, 1.353e-01, -1.715e-01, 6.161e-02, 2.463e-01, -1.117e-01, -6.945e-01, -2.380e-01, -4.026e-01, -5.408e-02, -7.610e-01, -1.757e-01, -7.335e-02, -1.158e-01, 7.664e-01, -2.806e-03) * s0_1_1;
	r3 += M4(-1.170e-01, -1.062e-01, -2.287e-01, -4.047e-02, 9.248e-03, -1.648e-01, 7.097e-02, -1.829e-02, 2.998e-01, 1.299e-01, -3.870e-01, 1.397e-01, -2.634e-02, 7.024e-02, 3.233e-01, -9.263e-02) * s0_1_1;
	r0 += M4(-2.167e-02, -5.259e-02, -1.091e-01, 1.557e-01, 7.148e-02, 8.718e-02, 8.287e-02, 8.238e-03, 1.926e-01, -1.293e-01, -6.165e-02, -2.397e-01, -1.638e-01, -5.600e-02, 2.526e-02, -3.871e-02) * s0_1_2;
	r1 += M4(-2.434e-02, 2.212e-01, 3.329e-02, 8.425e-03, -2.982e-02, 1.978e-01, 2.872e-02, 1.068e-01, 1.231e-01, -1.068e-01, -3.379e-02, 4.517e-02, 1.049e-02, 6.312e-03, -1.208e-01, 5.657e-02) * s0_1_2;
	r2 += M4(1.410e-01, -1.073e-02, 8.074e-02, 6.614e-02, -1.648e-01, -1.288e-01, 2.520e-01, 2.075e-02, -1.821e-01, 6.615e-02, -1.929e-01, -2.770e-01, -3.799e-01, -1.341e-01, -2.238e-01, -1.900e-01) * s0_1_2;
	r3 += M4(-1.759e-02, -1.127e-01, -1.278e-01, -5.150e-02, 6.136e-02, -2.515e-02, 1.372e-01, 1.211e-02, -2.933e-04, 3.621e-01, -9.811e-02, 6.467e-02, -9.601e-02, -4.894e-02, 2.405e-02, -5.742e-02) * s0_1_2;
	r0 += M4(1.045e-03, -2.151e-02, 1.269e-01, 4.918e-02, 7.900e-03, 3.484e-02, -6.067e-02, -1.490e-03, -5.750e-02, -7.934e-03, 4.513e-02, -3.048e-02, -6.764e-02, -4.888e-02, 3.994e-02, 4.987e-02) * s0_2_0;
	r1 += M4(-5.830e-02, 5.857e-02, -3.958e-02, -1.617e-03, -2.130e-02, 2.093e-02, 2.266e-02, 5.968e-02, 2.840e-02, -8.061e-02, -5.834e-02, 4.220e-02, 3.960e-02, -8.712e-02, -8.045e-02, 1.774e-02) * s0_2_0;
	r2 += M4(6.642e-02, 6.728e-02, -3.074e-01, 6.394e-03, 2.126e-02, 1.479e-02, 1.841e-02, -4.387e-03, -3.206e-02, -8.092e-02, 8.131e-03, -6.575e-02, -4.828e-02, -1.550e-02, 1.675e-01, 2.426e-02) * s0_2_0;
	r3 += M4(-3.602e-02, 4.663e-02, 1.827e-01, -2.550e-02, -2.553e-02, 1.435e-02, -1.255e-01, 1.404e-02, 5.719e-02, -1.523e-02, 1.057e-01, -7.645e-03, 3.951e-02, 5.478e-02, 1.136e-01, 1.562e-02) * s0_2_0;
	r0 += M4(-1.099e-01, -3.541e-02, 1.102e-01, 2.090e-03, -9.231e-02, -7.396e-02, 8.962e-02, -6.566e-02, -5.404e-02, -3.041e-02, 1.754e-02, 3.821e-02, 4.613e-03, 1.549e-02, -7.648e-02, -1.421e-02) * s0_2_1;
	r1 += M4(-1.528e-02, 1.470e-01, -1.733e-02, -1.937e-02, 3.057e-02, 4.880e-02, -9.272e-02, 3.797e-02, 6.350e-02, 5.852e-02, -1.219e-01, -4.239e-03, 6.260e-02, -2.608e-01, -5.658e-02, 3.840e-02) * s0_2_1;
	r2 += M4(-4.257e-02, 7.251e-02, -1.577e-01, -1.799e-02, -2.193e-02, 6.201e-02, -1.576e-01, -7.112e-02, -1.882e-01, -5.063e-02, 4.428e-01, -1.020e-01, -3.799e-02, -1.221e-01, 8.921e-02, -2.521e-02) * s0_2_1;
	r3 += M4(-8.085e-02, -7.789e-02, 1.772e-02, -1.411e-01, 2.373e-02, -2.142e-02, 8.677e-02, -5.279e-02, 1.775e-02, 2.826e-02, -5.126e-02, 1.248e-02, 2.256e-02, 1.778e-02, -1.194e-01, 1.310e-02) * s0_2_1;
	r0 += M4(-1.284e-02, -2.542e-02, 7.307e-02, 1.847e-01, -3.358e-02, 4.239e-02, 5.716e-02, 9.232e-02, 1.645e-01, 7.319e-02, -1.346e-02, -5.514e-03, 1.267e-01, 9.044e-02, -3.330e-02, -5.370e-02) * s0_2_2;
	r1 += M4(2.348e-03, -7.080e-02, -2.478e-01, -5.322e-02, -3.921e-02, -9.312e-02, -2.413e-02, 3.131e-02, 1.390e-03, -1.674e-02, -1.285e-01, 1.048e-02, 3.056e-03, -2.904e-01, 4.221e-02, 8.153e-03) * s0_2_2;
	r2 += M4(-1.499e-02, 3.967e-02, 1.963e-01, 5.822e-02, 2.904e-02, -6.479e-02, -8.806e-03, -8.188e-03, 3.046e-02, 5.532e-02, 7.481e-02, -1.778e-02, -1.696e-01, -1.639e-01, -1.139e-01, -3.811e-03) * s0_2_2;
	r3 += M4(3.685e-02, 1.287e-01, 1.733e-01, -5.705e-02, -3.869e-02, 1.459e-02, 7.668e-02, -5.364e-02, -1.279e-02, 8.198e-02, -1.024e-01, 1.678e-02, -1.262e-02, 9.070e-03, -6.416e-02, -2.403e-02) * s0_2_2;
	r0 += M4(-5.819e-02, -4.076e-03, -8.433e-02, -5.011e-02, -1.112e-01, -9.105e-02, -1.116e-01, 4.038e-02, -9.691e-03, -2.858e-02, 2.595e-02, 2.521e-02, 1.910e-01, 5.204e-02, 9.123e-02, 1.697e-02) * s1_0_0;
	r1 += M4(-5.398e-03, -1.286e-01, -3.994e-02, -7.114e-03, -3.111e-02, 1.028e-01, -6.869e-02, 5.493e-03, 3.756e-02, -7.155e-02, 3.995e-02, -3.134e-02, 6.137e-02, 4.781e-02, 1.419e-02, 2.679e-02) * s1_0_0;
	r2 += M4(-7.245e-02, -3.477e-02, -7.956e-02, -8.514e-02, 6.989e-02, -9.942e-02, -1.974e-03, 1.642e-01, 5.917e-02, -7.433e-02, 1.514e-01, 8.305e-02, 6.521e-02, 6.244e-02, 7.475e-02, -2.319e-02) * s1_0_0;
	r3 += M4(-2.601e-02, 4.494e-02, 9.831e-02, -4.480e-02, 3.545e-02, -8.653e-02, -1.302e-01, 5.213e-02, 1.367e-02, 5.133e-02, 9.942e-02, 1.323e-01, 9.313e-03, 1.768e-03, -3.764e-02, -4.018e-02) * s1_0_0;
	r0 += M4(-5.214e-02, 3.729e-02, -1.362e-01, 1.157e-03, -2.824e-01, -2.250e-02, -4.476e-02, 2.781e-01, 2.438e-01, 2.666e-01, 1.585e-01, 2.402e-01, 3.903e-01, 1.636e-02, 8.126e-02, -5.847e-03) * s1_0_1;
	r1 += M4(-2.525e-03, -3.059e-02, 3.314e-02, 3.262e-03, -7.783e-02, 5.589e-02, 1.900e-01, 4.007e-02, 9.159e-02, 2.292e-01, 5.101e-02, -5.856e-03, 1.130e-01, 9.574e-02, -1.528e-01, 6.365e-04) * s1_0_1;
	r2 += M4(-2.669e-03, 8.402e-02, -6.561e-02, -6.795e-02, -8.127e-02, -2.450e-01, -1.059e-01, 9.547e-03, -1.942e-01, -9.598e-02, 6.314e-02, 1.264e-01, 2.241e-02, 2.128e-01, 1.912e-02, 4.740e-02) * s1_0_1;
	r3 += M4(9.858e-02, -5.673e-02, 2.304e-02, 1.314e-01, -7.664e-02, -4.169e-01, -1.645e-01, 4.350e-02, -4.974e-02, 9.835e-02, 1.694e-01, 6.881e-02, 1.436e-01, 5.119e-02, -5.338e-02, -5.138e-02) * s1_0_1;
	r0 += M4(-9.029e-02, -5.606e-02, -1.002e-01, -5.941e-02, -2.010e-01, -3.766e-02, -3.953e-02, -3.901e-02, 6.989e-02, 7.598e-02, 3.203e-02, 1.038e-01, 3.213e-01, 1.165e-01, 8.754e-02, -6.684e-03) * s1_0_2;
	r1 += M4(6.661e-03, 2.147e-02, 4.525e-02, 6.572e-03, -3.693e-02, -7.474e-03, -2.749e-02, 4.241e-03, 1.933e-03, -4.962e-02, -1.631e-02, -3.777e-03, 3.667e-02, -5.429e-02, -2.300e-01, 2.167e-02) * s1_0_2;
	r2 += M4(-2.029e-03, 1.327e-02, 1.069e-02, -4.761e-03, -1.413e-02, -1.293e-01, -4.473e-02, -3.476e-02, 2.181e-02, 1.381e-01, 4.454e-02, -9.999e-02, 1.020e-01, 1.330e-01, 1.431e-01, 1.145e-01) * s1_0_2;
	r3 += M4(4.867e-03, -1.359e-03, -8.367e-02, 1.854e-02, -2.368e-02, -2.427e-01, -5.011e-02, 3.522e-02, 1.072e-01, 2.812e-01, 2.047e-01, -1.967e-02, 1.802e-02, 3.092e-01, 1.076e-01, -3.159e-02) * s1_0_2;
	r0 += M4(-2.981e-02, -2.814e-02, -2.627e-01, 4.642e-02, -5.469e-01, -1.460e-01, -3.935e-02, -1.209e-01, 1.486e-01, 4.255e-02, -2.435e-02, -8.916e-02, 1.136e-01, 9.643e-02, -7.505e-02, 2.874e-02) * s1_1_0;
	r1 += M4(7.057e-02, 9.782e-03, -3.048e-03, 1.122e-02, 3.473e-01, 5.801e-02, 2.083e-02, 4.889e-02, 2.135e-01, 1.179e-01, 3.981e-02, -1.077e-02, 8.716e-03, -2.917e-01, -5.489e-02, -6.382e-02) * s1_1_0;
	r2 += M4(4.518e-02, 8.324e-02, 1.070e-01, -1.733e-01, -2.722e-01, -2.403e-01, -1.992e-01, -2.471e-02, -1.783e-01, -1.264e-01, -1.964e-01, 7.302e-03, -2.074e-01, -4.299e-02, -1.000e+00, 1.909e-01) * s1_1_0;
	r3 += M4(1.184e-01, 9.761e-02, 5.266e-02, 8.413e-02, -1.460e-01, 1.010e-01, -2.864e-01, -3.076e-01, -7.366e-02, -3.868e-02, -5.329e-02, -1.522e-01, -1.117e-01, 5.265e-02, -1.831e-01, -9.253e-02) * s1_1_0;
	r0 += M4(-1.115e-01, 3.132e-01, -8.895e-02, 6.307e-02, -3.872e-01, 2.647e-01, 5.059e-01, -1.782e-01, 3.378e-01, -4.358e-01, -1.996e-01, -3.864e-02, 3.164e-02, 3.347e-01, -1.204e-01, -8.238e-02) * s1_1_1;
	r1 += M4(-1.870e-01, 1.718e-01, 2.693e-02, -8.595e-02, -1.481e-01, 1.830e-01, 7.076e-02, -1.634e-01, -5.072e-02, -2.277e-01, 7.412e-02, -8.699e-02, 1.214e-01, -3.149e-02, -1.474e-01, -4.511e-03) * s1_1_1;
	r2 += M4(1.184e-01, 5.915e-02, 1.576e-01, 1.023e-01, -7.789e-02, 2.094e-01, -3.549e-01, 4.561e-01, -3.798e-02, -1.016e-02, 3.511e-01, -2.024e-01, -1.454e-02, -2.301e-01, -3.361e-02, -1.909e-01) * s1_1_1;
	r3 += M4(4.116e-01, -4.173e-01, 2.891e-01, -1.363e-01, -8.812e-01, -6.398e-01, -1.266e-01, -2.842e-01, -1.144e-01, -3.228e-01, -2.478e-01, -1.388e-01, 9.462e-02, 2.860e-02, -1.715e-01, 4.681e-02) * s1_1_1;
	r0 += M4(-1.054e-02, -4.966e-02, -4.006e-02, 4.110e-02, -2.008e-01, -9.803e-02, -2.149e-02, 2.109e-01, 6.962e-02, 2.881e-01, 1.231e-01, 1.996e-01, 4.873e-01, 4.176e-02, 4.551e-02, -1.331e-01) * s1_1_2;
	r1 += M4(9.114e-03, 3.186e-02, 6.859e-03, 7.183e-03, 2.211e-04, 8.834e-02, 1.083e-02, -4.325e-02, -8.683e-02, -1.232e-01, 1.415e-02, -3.508e-02, 9.227e-02, -3.543e-01, -1.239e-01, 4.072e-02) * s1_1_2;
	r2 += M4(2.679e-02, -9.060e-03, 1.579e-01, -7.952e-02, 9.668e-02, -1.056e-01, 2.484e-01, -1.569e-02, 1.594e-01, 1.889e-02, -1.004e-01, 6.586e-02, 1.322e-02, 1.446e-01, 2.629e-01, 1.372e-01) * s1_1_2;
	r3 += M4(1.015e-01, 2.446e-02, 3.704e-02, -3.458e-03, 1.547e-01, -1.467e-02, -7.734e-03, 2.728e-03, 7.234e-02, -1.141e-01, 5.781e-02, -1.939e-01, -1.342e-02, 4.306e-01, -2.264e-02, 1.631e-03) * s1_1_2;
	r0 += M4(-1.045e-01, -6.681e-03, -3.628e-03, 3.084e-02, -2.125e-01, -7.817e-02, -1.597e-01, -8.721e-02, 5.211e-02, 1.370e-01, -5.758e-02, 2.207e-02, -1.335e-01, -3.590e-02, -7.103e-02, -2.782e-03) * s1_2_0;
	r1 += M4(-8.131e-03, 1.808e-02, 7.885e-03, 2.984e-02, -1.589e-02, -2.224e-01, 2.854e-02, 1.020e-01, -7.881e-02, -2.955e-02, 5.351e-02, 1.013e-01, -2.241e-02, 3.673e-02, -8.898e-02, -3.080e-02) * s1_2_0;
	r2 += M4(2.190e-02, -1.169e-01, -3.597e-01, -3.154e-01, 4.040e-02, -4.610e-03, 4.107e-01, 5.224e-02, 8.120e-02, 1.208e-02, -2.768e-01, 5.596e-02, 1.993e-02, 9.319e-02, -3.635e-01, 7.585e-02) * s1_2_0;
	r3 += M4(4.506e-02, 3.648e-02, -7.201e-02, 2.188e-02, 1.678e-02, 3.221e-02, -2.465e-02, -1.783e-02, -1.074e-01, -2.452e-02, -1.417e-01, -2.679e-02, 4.053e-03, -4.186e-02, 7.671e-02, 1.752e-02) * s1_2_0;
	r0 += M4(-3.712e-02, 1.325e-01, -4.715e-02, -4.678e-02, -2.193e-01, 1.319e-01, -1.101e-01, 1.875e-02, -1.117e-02, 1.664e-01, 1.290e-01, 6.411e-03, -5.153e-02, 5.306e-02, -2.502e-01, -9.817e-03) * s1_2_1;
	r1 += M4(-1.156e-02, -4.742e-02, -4.350e-02, 1.301e-01, -3.028e-02, -1.582e-01, -1.183e-02, 1.295e-01, -6.307e-02, -1.399e-01, 8.069e-02, 1.769e-01, -8.473e-03, -3.580e-03, -2.782e-02, -1.771e-01) * s1_2_1;
	r2 += M4(2.443e-02, -1.280e-01, 1.578e-01, -3.281e-02, 1.089e-01, -1.344e-01, 3.232e-01, 3.602e-02, 1.527e-01, 3.762e-02, -3.854e-01, -3.906e-02, -1.125e-02, 1.340e-01, -9.336e-02, 1.531e-01) * s1_2_1;
	r3 += M4(1.038e-01, 1.263e-02, 1.858e-02, -1.814e-02, -1.547e-01, 1.291e-02, 1.885e-02, -8.382e-02, -1.304e-01, 1.339e-03, -1.117e-01, -2.072e-02, -5.334e-02, -1.285e-01, -2.379e-01, 1.845e-02) * s1_2_1;
	r0 += M4(-4.989e-02, -7.147e-03, -1.659e-02, -4.715e-02, -1.519e-01, -6.746e-03, -1.125e-01, -1.628e-02, -1.351e-01, 6.067e-02, 6.826e-02, 6.547e-02, 3.545e-01, -5.963e-03, 8.103e-02, -5.262e-02) * s1_2_2;
	r1 += M4(1.166e-02, -1.737e-03, 1.483e-02, 5.913e-02, -2.010e-02, -3.225e-02, 7.496e-02, 1.191e-02, -5.872e-02, 6.489e-02, 6.925e-03, 6.764e-02, 6.831e-02, 9.340e-02, 2.096e-02, -5.299e-02) * s1_2_2;
	r2 += M4(-1.315e-02, -1.655e-01, 2.429e-02, -2.186e-02, -6.744e-02, -4.782e-02, 1.900e-01, -5.194e-02, 5.360e-02, 1.050e-02, -1.091e-01, 2.467e-03, 6.722e-02, 9.447e-02, -1.757e-01, 6.801e-02) * s1_2_2;
	r3 += M4(4.632e-02, 3.699e-02, 1.916e-02, -1.702e-02, 4.843e-02, -1.640e-02, 1.727e-02, 1.450e-02, 3.979e-02, -1.776e-01, 9.508e-02, -7.607e-02, 2.566e-03, 2.424e-02, -2.962e-02, 1.236e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2]; s1_0_0 = G[3][xy.y+0][xy.x+0];
	s1_0_1 = G[3][xy.y+0][xy.x+1]; s1_0_2 = G[3][xy.y+0][xy.x+2];
	s1_1_0 = G[3][xy.y+1][xy.x+0]; s1_1_1 = G[3][xy.y+1][xy.x+1];
	s1_1_2 = G[3][xy.y+1][xy.x+2]; s1_2_0 = G[3][xy.y+2][xy.x+0];
	s1_2_1 = G[3][xy.y+2][xy.x+1]; s1_2_2 = G[3][xy.y+2][xy.x+2];
	r0 += M4(1.776e-01, 5.548e-02, -1.448e-02, -1.425e-02, 8.587e-02, -2.162e-02, 7.748e-02, -3.170e-03, -4.823e-02, -5.646e-02, -1.547e-01, 4.881e-02, -2.741e-02, -1.334e-02, 3.641e-03, 1.554e-02) * s0_0_0;
	r1 += M4(8.935e-02, 6.413e-02, -1.910e-02, -2.198e-02, 3.267e-02, 1.878e-02, -1.769e-01, -2.207e-02, -8.687e-03, -3.778e-03, 1.320e-02, 2.691e-02, 6.597e-03, 4.156e-02, 9.549e-03, -3.067e-02) * s0_0_0;
	r2 += M4(-1.089e-01, -2.938e-02, -1.486e-01, -6.241e-02, 6.075e-02, 6.453e-02, -4.350e-02, -4.577e-02, 1.871e-01, 1.467e-01, 3.431e-01, -1.735e-02, -2.872e-04, -1.592e-02, -1.392e-01, 5.249e-02) * s0_0_0;
	r3 += M4(2.079e-02, 7.117e-03, -6.329e-02, -3.573e-03, -5.348e-04, 8.079e-02, 8.016e-03, -9.605e-03, 1.140e-01, 5.429e-02, -4.382e-02, -3.895e-03, -2.732e-02, -3.718e-02, -5.311e-03, 2.293e-02) * s0_0_0;
	r0 += M4(2.061e-01, 7.843e-02, 6.791e-02, 9.345e-02, 1.539e-01, -1.139e-02, 6.585e-02, -3.348e-02, 2.266e-01, 2.272e-01, 1.841e-01, -3.140e-01, -2.192e-01, -5.761e-02, -1.027e-01, -4.800e-02) * s0_0_1;
	r1 += M4(1.752e-02, -2.334e-01, 3.595e-02, 3.083e-02, 8.717e-02, 1.238e-01, -1.320e-01, 3.297e-02, 2.034e-02, 2.065e-01, 2.078e-01, 1.243e-01, -3.817e-02, -3.013e-01, -3.546e-02, -1.713e-02) * s0_0_1;
	r2 += M4(-1.923e-01, -1.248e-01, 1.838e-02, 3.872e-02, -4.672e-02, -8.436e-02, -1.262e-01, 4.918e-02, -1.904e-01, -2.264e-01, 7.470e-02, -5.663e-02, 7.221e-02, 9.816e-03, -9.507e-02, 3.866e-02) * s0_0_1;
	r3 += M4(4.895e-02, 2.746e-01, 8.343e-02, 2.874e-02, 1.196e-02, -6.336e-02, -1.395e-02, -1.115e-01, -1.198e-01, 2.104e-02, 1.518e-01, -1.282e-01, 4.624e-02, -2.279e-02, 4.908e-02, 1.105e-01) * s0_0_1;
	r0 += M4(5.770e-02, -2.006e-03, -2.282e-03, 2.883e-02, 1.453e-01, 1.555e-02, -1.554e-02, -9.344e-02, 9.176e-03, -5.743e-02, 2.309e-02, -1.777e-01, -1.577e-01, -2.146e-02, -4.337e-02, 8.363e-02) * s0_0_2;
	r1 += M4(2.206e-02, 1.968e-02, -5.518e-02, 1.656e-02, 4.074e-02, -3.433e-02, -1.130e-01, -1.438e-02, 2.870e-02, -9.012e-02, -2.901e-04, 2.037e-02, -1.278e-02, 9.369e-02, 8.823e-02, -3.348e-02) * s0_0_2;
	r2 += M4(1.240e-02, -3.957e-02, 1.085e-01, 4.176e-02, -3.601e-02, -9.011e-02, 3.395e-02, 2.871e-02, 2.764e-02, 2.453e-02, 2.293e-01, -4.119e-02, -1.862e-02, -8.684e-03, 7.140e-02, 2.200e-02) * s0_0_2;
	r3 += M4(2.049e-02, 5.595e-02, 3.877e-03, 1.520e-02, 1.677e-02, 2.900e-01, -7.794e-02, 1.722e-02, 5.609e-02, 2.571e-01, 1.548e-02, 6.921e-03, -1.000e-02, -9.026e-02, 2.705e-02, -8.242e-04) * s0_0_2;
	r0 += M4(2.130e-01, 8.082e-02, -1.173e-01, 2.892e-02, 2.157e-01, -2.134e-02, 2.081e-02, -8.160e-02, -1.666e-01, -3.672e-02, 1.290e-02, 6.581e-02, -7.273e-02, -2.770e-02, -9.272e-03, 2.580e-02) * s0_1_0;
	r1 += M4(7.760e-02, 6.364e-02, 5.845e-02, -1.141e-01, 7.992e-02, 1.499e-01, -1.042e-01, -1.125e-02, -1.513e-02, -1.212e-01, -3.268e-02, 1.843e-03, -1.313e-02, -1.002e-01, 1.233e-01, -9.281e-03) * s0_1_0;
	r2 += M4(-4.782e-02, -1.100e-01, -2.271e-01, 7.961e-02, 1.009e-01, 6.985e-02, 2.252e-01, 7.731e-02, 1.275e-01, 9.068e-02, -1.311e-01, 6.246e-02, -5.100e-02, -1.233e-01, 6.147e-02, -1.163e-01) * s0_1_0;
	r3 += M4(6.115e-02, 3.103e-03, -1.956e-01, 9.027e-02, 8.412e-02, 5.545e-02, 4.198e-02, -6.872e-02, -4.899e-03, -2.380e-02, -9.926e-03, -2.001e-02, 2.956e-02, 3.471e-02, 1.098e-01, 2.149e-02) * s0_1_0;
	r0 += M4(1.741e-01, -5.031e-02, -7.234e-02, 4.895e-02, 4.122e-01, -8.993e-03, 2.911e-02, -6.885e-03, -1.969e-01, 1.903e-01, -3.757e-02, -2.997e-02, -6.220e-01, 3.717e-02, -4.668e-02, -1.198e-01) * s0_1_1;
	r1 += M4(9.418e-02, -4.052e-01, -1.384e-01, -4.506e-02, 1.797e-01, -1.236e-01, -5.339e-01, 3.944e-02, -2.607e-01, 6.058e-02, 1.706e-01, -2.149e-01, 9.145e-02, 7.902e-03, 2.590e-01, 1.372e-02) * s0_1_1;
	r2 += M4(-1.095e-01, -2.589e-01, 4.439e-02, 1.336e-01, -2.487e-01, -7.295e-02, 9.246e-02, 1.634e-01, -3.351e-01, -7.357e-01, -3.642e-01, 1.127e-01, 1.091e-01, 3.448e-02, 2.068e-02, -3.270e-01) * s0_1_1;
	r3 += M4(9.949e-02, 2.094e-01, -1.838e-01, 5.110e-02, -1.554e-02, 3.960e-01, -1.957e-01, 1.356e-03, -7.611e-02, -1.738e-01, -4.312e-01, 2.254e-01, -1.243e-01, -3.581e-02, 3.769e-01, 1.483e-01) * s0_1_1;
	r0 += M4(1.976e-01, 4.454e-02, -4.213e-02, -4.112e-01, 2.374e-01, 2.072e-02, 7.378e-02, -1.125e-01, -2.619e-01, -2.033e-01, -1.145e-01, -3.493e-01, -1.796e-01, 1.577e-01, -1.521e-01, 2.029e-01) * s0_1_2;
	r1 += M4(5.244e-02, -2.389e-01, 1.042e-01, 6.047e-02, 2.555e-02, 6.087e-02, -1.620e-01, 3.600e-02, -2.275e-02, 1.590e-01, 3.072e-01, 2.069e-03, -1.119e-01, 3.077e-01, 1.340e-01, -1.033e-01) * s0_1_2;
	r2 += M4(-7.629e-02, -6.826e-02, 9.400e-02, 1.008e-01, -1.204e-01, -3.802e-02, 2.044e-01, 9.956e-02, -2.540e-01, -3.125e-01, -1.928e-01, -3.375e-01, -4.473e-02, 4.984e-01, -7.282e-02, 2.301e-01) * s0_1_2;
	r3 += M4(4.589e-02, 1.097e-01, 1.266e-04, 2.578e-02, 8.500e-02, 2.575e-01, 1.255e-01, 2.830e-02, 6.812e-02, -1.736e-01, -1.412e-01, 1.829e-01, -1.277e-01, -7.045e-02, 2.902e-01, -1.560e-02) * s0_1_2;
	r0 += M4(6.968e-02, -1.497e-02, 1.142e-02, 2.430e-02, 2.288e-01, 1.699e-02, -4.925e-02, 1.984e-02, 3.436e-02, -4.381e-02, 5.190e-02, -1.665e-02, -9.559e-02, -1.546e-02, 4.653e-02, 3.088e-02) * s0_2_0;
	r1 += M4(5.820e-02, 9.941e-02, -5.433e-02, 2.838e-03, 1.211e-01, 1.548e-01, -5.456e-02, -2.572e-02, -7.172e-02, 7.756e-02, -6.531e-02, -9.780e-02, -6.784e-02, -5.695e-03, -1.060e-02, -5.014e-03) * s0_2_0;
	r2 += M4(-3.838e-02, -1.725e-02, 9.240e-02, -3.480e-02, -6.518e-02, -9.414e-02, 3.337e-02, -3.129e-02, -4.203e-03, 7.856e-02, -8.195e-02, -6.157e-02, 1.110e-02, -1.215e-02, 1.129e-01, 7.643e-02) * s0_2_0;
	r3 += M4(8.079e-03, 5.135e-02, 8.884e-02, 2.494e-02, 9.617e-02, -2.998e-05, -1.002e-01, -1.233e-02, -6.544e-02, 8.108e-03, 8.467e-02, -8.881e-02, 5.929e-03, -3.348e-02, 4.510e-02, 2.640e-02) * s0_2_0;
	r0 += M4(1.548e-01, -6.367e-02, -1.860e-01, 1.305e-03, 1.554e-01, -6.323e-02, -1.002e-01, 4.681e-02, -1.636e-01, -1.607e-01, 5.136e-02, 1.416e-01, 3.018e-01, 1.383e-01, 1.991e-01, 6.904e-02) * s0_2_1;
	r1 += M4(1.426e-01, -3.427e-02, -8.333e-02, -2.263e-01, 1.024e-01, -1.194e-02, -1.364e-01, -4.156e-02, -5.778e-02, 4.332e-02, 9.415e-02, -1.143e-01, 3.842e-02, 1.018e-01, 1.046e-02, 1.270e-02) * s0_2_1;
	r2 += M4(-1.977e-01, -2.050e-05, 2.255e-01, 1.108e-01, -1.081e-01, -1.830e-02, 1.226e-01, 1.026e-01, -1.618e-01, 2.101e-02, 2.616e-01, -6.387e-03, 9.468e-02, 1.393e-02, -7.642e-01, -8.871e-02) * s0_2_1;
	r3 += M4(1.046e-01, -9.497e-03, -1.799e-01, 8.788e-02, 9.446e-02, 9.503e-02, -1.359e-01, 8.993e-02, 4.143e-02, -4.745e-02, 1.889e-01, 1.834e-04, -1.919e-01, 3.300e-02, -1.102e-01, 3.328e-02) * s0_2_1;
	r0 += M4(2.581e-01, 6.849e-02, 1.715e-02, 6.992e-02, 2.203e-01, 7.106e-02, 7.133e-02, -4.898e-02, -1.997e-01, -1.797e-01, -4.999e-02, 1.712e-01, 2.193e-01, 9.248e-02, 2.009e-01, -2.017e-01) * s0_2_2;
	r1 += M4(1.701e-02, 3.341e-02, -2.650e-01, -1.311e-01, 2.168e-02, 1.450e-01, -5.854e-02, 2.566e-02, -3.173e-02, 1.658e-01, 6.633e-02, -1.370e-02, 3.022e-02, -2.670e-01, 1.459e-02, -1.755e-01) * s0_2_2;
	r2 += M4(1.836e-01, 2.640e-01, -1.315e-01, 8.615e-02, 2.312e-02, 2.152e-02, -1.027e-02, 1.082e-01, -7.959e-02, -6.076e-02, -9.768e-02, -2.419e-01, -2.002e-02, 1.588e-01, -2.628e-02, 2.822e-01) * s0_2_2;
	r3 += M4(-3.409e-02, 2.134e-01, 1.409e-02, 3.391e-02, 3.586e-02, 1.382e-01, 5.088e-02, 3.746e-03, -3.612e-02, -6.542e-02, 1.196e-01, -1.864e-02, -1.969e-01, 1.202e-01, -1.161e-01, 1.549e-02) * s0_2_2;
	r0 += M4(-1.258e-03, -2.885e-03, 8.655e-02, -1.300e-01, 2.863e-02, -5.482e-03, -1.004e-02, -5.140e-02, -7.709e-02, -5.569e-02, -1.284e-01, -2.804e-02, 2.306e-02, 4.561e-03, 1.274e-01, 4.608e-03) * s1_0_0;
	r1 += M4(1.047e-01, 3.010e-02, 9.832e-02, -4.412e-02, 7.233e-02, 3.382e-02, -1.361e-02, -1.666e-02, 1.700e-02, 5.239e-02, 1.584e-02, 1.087e-02, -5.290e-02, 7.881e-02, -3.796e-02, 2.042e-02) * s1_0_0;
	r2 += M4(-2.049e-01, -1.195e-01, 4.415e-02, 2.649e-02, -8.365e-02, -2.511e-02, 1.725e-01, -5.461e-02, -1.242e-01, -1.169e-01, -3.440e-02, -1.684e-01, 2.106e-03, -4.067e-02, -1.360e-01, 5.287e-02) * s1_0_0;
	r3 += M4(2.896e-02, 3.447e-02, 1.628e-01, -5.749e-02, 1.252e-02, -9.426e-03, -1.730e-02, 6.089e-02, 7.452e-02, -9.321e-02, -1.347e-01, -9.685e-03, 1.248e-02, -2.533e-02, 1.199e-01, 3.889e-02) * s1_0_0;
	r0 += M4(-2.496e-01, 2.365e-02, -1.152e-01, -1.729e-01, 7.930e-02, 4.484e-02, -6.276e-02, -3.259e-02, -3.626e-03, -6.907e-02, 3.446e-03, -2.233e-01, 1.267e-01, 4.387e-02, 2.089e-01, 1.882e-01) * s1_0_1;
	r1 += M4(-1.597e-01, -9.884e-02, -1.434e-02, -5.963e-02, 8.984e-02, -1.272e-01, -8.431e-02, -3.639e-02, -4.114e-02, 5.917e-02, -1.729e-02, 2.065e-02, 6.577e-02, -8.445e-03, -5.051e-02, -9.482e-03) * s1_0_1;
	r2 += M4(1.805e-01, 2.026e-01, -2.688e-01, 1.334e-01, 8.325e-02, 2.214e-01, -1.105e-01, 3.473e-02, -6.493e-02, -1.074e-01, -1.236e-01, -1.114e-01, -7.946e-03, -1.794e-01, 1.333e-01, 1.037e-01) * s1_0_1;
	r3 += M4(-9.989e-02, 2.939e-01, -8.487e-02, -1.354e-01, -7.567e-02, 1.584e-01, -1.427e-01, 6.669e-02, -7.450e-02, 1.335e-01, -1.123e-01, 4.203e-02, -3.936e-03, -6.716e-02, 1.812e-01, -3.009e-02) * s1_0_1;
	r0 += M4(-1.686e-01, -6.123e-02, 3.602e-02, 1.461e-01, -5.105e-02, 9.506e-02, -1.641e-01, -1.547e-01, -8.177e-02, -6.012e-02, -9.010e-02, -1.950e-01, 4.684e-02, 9.009e-02, 2.872e-02, 4.639e-01) * s1_0_2;
	r1 += M4(-1.029e-02, 1.290e-01, 9.954e-02, -3.398e-02, 1.118e-02, -3.055e-02, -7.373e-03, -6.726e-02, -5.329e-03, 4.847e-02, 6.700e-02, -1.323e-02, -5.271e-03, 4.171e-02, 1.036e-03, 6.116e-02) * s1_0_2;
	r2 += M4(3.324e-02, 1.118e-02, 3.274e-02, -6.541e-02, 7.724e-02, 2.232e-01, 8.869e-03, 4.092e-02, -1.722e-04, 1.836e-02, 6.580e-02, -6.547e-03, -3.513e-03, -7.257e-02, -2.837e-05, 1.688e-02) * s1_0_2;
	r3 += M4(-2.882e-02, -4.385e-02, -1.253e-01, -2.476e-02, 2.256e-02, -1.063e-02, -4.259e-02, 1.103e-01, 2.570e-03, -7.195e-02, -9.257e-02, -3.270e-02, 1.310e-01, 2.299e-01, 2.299e-01, 4.499e-02) * s1_0_2;
	r0 += M4(3.902e-01, 2.745e-01, 1.824e-01, 1.091e-02, 1.016e-01, 7.559e-02, -3.195e-02, -6.818e-03, -2.989e-01, -1.817e-02, -6.623e-02, -8.264e-03, 8.627e-02, 8.958e-02, 1.693e-01, -9.932e-02) * s1_1_0;
	r1 += M4(-3.173e-01, 1.588e-01, 1.745e-01, -1.222e-01, -4.672e-02, 4.668e-03, -6.456e-02, -1.814e-02, -4.261e-01, -2.829e-01, -2.293e-01, -1.134e-01, -6.222e-02, -1.424e-01, 1.041e-01, 8.807e-02) * s1_1_0;
	r2 += M4(7.358e-02, 1.714e-01, -6.397e-01, 5.555e-02, 7.290e-02, 1.308e-01, 3.020e-01, -1.501e-02, 1.821e-01, 3.003e-01, -1.674e-01, 2.713e-01, 1.863e-02, -2.752e-03, -6.493e-01, -8.889e-02) * s1_1_0;
	r3 += M4(-8.270e-02, 3.184e-01, -3.765e-02, 5.355e-02, -1.692e-02, 9.502e-02, -3.901e-02, 6.168e-02, -1.361e-01, 4.460e-02, -2.116e-01, 1.182e-01, -5.341e-02, -4.849e-02, 3.750e-02, -1.061e-01) * s1_1_0;
	r0 += M4(-3.066e-01, 4.321e-01, -1.744e-01, 4.508e-01, 1.960e-01, 1.810e-01, -2.072e-01, -2.135e-01, -6.818e-01, -2.743e-01, -2.012e-01, -4.428e-01, -2.586e-01, -2.342e-01, -7.832e-02, 5.184e-01) * s1_1_1;
	r1 += M4(-9.700e-02, 4.675e-02, 2.446e-01, 5.362e-02, 2.871e-01, 2.954e-01, 5.943e-01, 3.324e-02, -2.349e-02, 5.748e-02, 7.206e-02, -1.963e-02, 2.318e-01, -2.250e-02, 4.871e-02, 4.300e-02) * s1_1_1;
	r2 += M4(5.723e-01, -1.926e-01, 6.504e-02, 9.580e-03, -1.260e-01, -1.997e-01, 1.715e-01, -3.361e-01, 3.135e-02, 3.308e-02, 3.008e-01, -7.467e-02, -6.768e-02, -4.150e-02, -3.775e-01, -1.086e-01) * s1_1_1;
	r3 += M4(5.133e-01, 1.530e-01, 5.410e-01, -2.696e-01, 6.350e-02, -6.821e-02, 3.589e-02, 5.894e-01, 1.263e-01, -1.000e+00, 2.573e-02, 1.171e-02, 2.551e-01, -2.397e-01, -6.776e-02, 2.385e-01) * s1_1_1;
	r0 += M4(-2.288e-01, 5.643e-02, -8.455e-02, -5.206e-02, 5.417e-02, 1.997e-01, -2.036e-01, 1.703e-02, -1.939e-01, -3.142e-03, -4.051e-02, -3.503e-02, 6.029e-02, 1.547e-01, 1.656e-01, 2.122e-01) * s1_1_2;
	r1 += M4(-5.987e-02, 1.206e-01, -1.980e-03, -3.775e-02, -5.097e-02, 1.327e-01, 2.737e-02, 5.441e-02, -3.725e-02, 7.058e-02, -5.286e-02, 1.575e-02, -5.544e-02, -1.402e-01, 1.755e-02, -1.401e-02) * s1_1_2;
	r2 += M4(5.477e-02, -6.046e-02, 7.648e-02, 1.783e-01, 1.919e-01, 7.690e-02, 3.916e-01, 5.058e-02, 1.752e-02, 2.240e-03, -1.635e-01, -7.141e-02, -1.741e-02, -9.219e-02, 1.140e-01, 2.267e-01) * s1_1_2;
	r3 += M4(3.876e-02, -1.647e-01, -4.647e-02, -5.266e-02, -3.536e-02, 1.993e-02, -3.708e-02, 9.581e-02, 5.114e-02, -5.620e-02, 2.839e-02, -7.013e-02, -2.575e-02, -6.515e-02, 2.149e-01, -9.057e-02) * s1_1_2;
	r0 += M4(6.134e-02, 3.740e-02, 1.709e-01, 5.620e-02, -4.878e-02, 4.449e-02, 2.787e-02, 4.781e-02, -2.568e-01, -1.702e-02, 7.348e-03, 5.790e-02, 6.279e-02, 1.555e-01, 1.183e-02, -1.343e-02) * s1_2_0;
	r1 += M4(3.357e-02, 2.735e-01, 3.360e-02, -1.781e-03, -8.951e-02, -4.276e-02, 9.428e-03, -6.239e-02, -1.543e-01, 3.524e-02, -1.884e-02, 8.732e-03, 5.768e-02, 7.386e-02, -1.130e-02, 6.503e-02) * s1_2_0;
	r2 += M4(7.195e-02, 4.893e-02, -3.225e-01, 1.101e-01, 1.103e-01, 1.352e-01, 1.146e-01, -1.100e-02, -1.562e-01, -1.190e-01, 3.080e-01, 1.968e-02, 4.986e-02, -1.113e-01, -7.523e-01, 3.770e-02) * s1_2_0;
	r3 += M4(8.356e-03, 9.239e-02, 5.025e-02, 6.757e-02, 3.937e-02, -6.177e-03, -4.043e-02, 8.474e-02, 9.719e-02, -2.565e-02, -5.893e-02, -7.554e-02, -9.630e-02, -9.434e-02, -3.372e-02, -2.248e-03) * s1_2_0;
	r0 += M4(-8.282e-02, 3.862e-02, 1.187e-01, -1.821e-01, 5.732e-02, -1.965e-02, 1.765e-01, -1.950e-02, -2.086e-01, -1.212e-02, -2.026e-01, -5.772e-02, 9.767e-02, 8.396e-02, 3.276e-01, 1.188e-01) * s1_2_1;
	r1 += M4(2.903e-02, 2.603e-01, 1.133e-01, -3.101e-02, 1.922e-02, 1.883e-01, -9.240e-03, -5.040e-02, 2.869e-02, -1.960e-01, 7.913e-02, 1.184e-01, -3.808e-02, 1.488e-01, -3.694e-02, -5.232e-02) * s1_2_1;
	r2 += M4(-5.176e-02, -1.087e-01, -2.861e-01, 9.272e-02, 1.014e-01, 1.065e-01, -4.521e-01, -2.313e-01, -5.083e-02, -4.906e-02, -1.860e-01, -3.425e-01, -2.642e-02, -8.573e-02, -6.618e-01, 1.440e-01) * s1_2_1;
	r3 += M4(6.307e-02, 8.261e-02, 3.137e-01, 2.947e-02, -6.380e-02, -2.066e-02, 2.255e-01, -8.864e-02, -7.923e-02, 3.988e-02, -2.858e-01, 4.271e-02, -4.631e-02, 7.928e-02, 1.997e-01, -1.176e-01) * s1_2_1;
	r0 += M4(-1.460e-01, 3.081e-02, -6.705e-04, -6.745e-02, 3.067e-02, 4.152e-02, -1.786e-02, -7.981e-02, -8.921e-02, 1.465e-02, -9.213e-02, -1.321e-02, -7.773e-02, 1.880e-02, 1.152e-01, 1.919e-01) * s1_2_2;
	r1 += M4(-4.366e-02, 7.444e-02, 2.939e-02, -4.258e-03, 4.348e-02, 3.683e-02, 5.074e-02, 1.182e-02, -1.376e-02, -5.225e-02, 7.006e-02, 5.832e-02, -2.754e-02, -5.874e-02, -7.116e-02, -3.472e-02) * s1_2_2;
	r2 += M4(-1.141e-02, -4.975e-02, 6.597e-02, 1.072e-01, 3.495e-02, -3.012e-02, -1.086e-01, -1.211e-01, 8.777e-02, -3.024e-02, 2.522e-01, -5.733e-03, 3.370e-02, 5.978e-02, 2.520e-02, 1.109e-01) * s1_2_2;
	r3 += M4(-3.565e-02, -5.300e-02, 9.792e-02, -3.841e-02, 1.694e-01, -1.135e-05, 1.178e-01, 2.787e-02, 6.860e-02, 4.946e-02, -6.256e-02, -3.241e-02, -3.636e-02, -5.155e-02, 1.459e-01, -7.911e-02) * s1_2_2;
	r0 += V4(-2.375e-02, 1.036e-02, -2.018e-02, -3.606e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.004e-02, -2.417e-02, -1.400e-03, -4.897e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.569e-02, -1.748e-02, 1.294e-02, -1.214e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r2));
	r3 += V4(2.844e-03, 1.053e-02, -1.975e-02, 2.844e-03);
	r3 = clamp(r3, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r3));
}

//!DESC [CuNNy_4x16_DS] -conv4
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv3
//!BIND LUMA
//!SAVE conv4
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
shared V4 G[4][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			vec2 p;
			p = vec2(clamp(pos + ivec2(x - 1, y - 1), ivec2(0), sz) * ivec2(2, 2) + ivec2(1, 1)) * conv3_pt;
			V4 sr0 = V4(conv3_gather(p, 0));
			V4 sg0 = V4(conv3_gather(p, 1));
			V4 sb0 = V4(conv3_gather(p, 2));
			V4 sa0 = V4(conv3_gather(p, 3));
			G[0][ay][ax] = V4(sr0.w, sg0.w, sb0.w, sa0.w);
			G[1][ay][ax] = V4(sr0.z, sg0.z, sb0.z, sa0.z);
			G[2][ay][ax] = V4(sr0.x, sg0.x, sb0.x, sa0.x);
			G[3][ay][ax] = V4(sr0.y, sg0.y, sb0.y, sa0.y);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2, r3;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0); r3 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(1.986e-03, -4.134e-03, -6.762e-02, 1.246e-03, -2.493e-02, -4.165e-02, -1.889e-02, 1.167e-03, 8.175e-02, 1.611e-01, -2.914e-02, 3.468e-02, -3.203e-02, 8.597e-03, 2.795e-02, -7.267e-03) * s0_0_0;
	r1 += M4(2.936e-02, -1.405e-02, -1.949e-02, -5.641e-02, -2.909e-02, -3.595e-03, -4.202e-02, 3.429e-02, 9.594e-02, -3.182e-02, -5.462e-02, -3.179e-02, -1.791e-02, -3.316e-02, 4.560e-03, -1.696e-02) * s0_0_0;
	r2 += M4(-3.333e-02, -1.175e-03, 2.137e-02, 7.770e-03, -4.613e-02, 4.445e-02, -7.006e-02, 1.447e-02, -5.072e-02, -5.352e-02, 1.234e-01, 3.823e-02, -5.142e-02, 3.023e-02, 4.408e-02, -4.900e-02) * s0_0_0;
	r3 += M4(4.277e-04, 1.162e-02, 2.973e-03, 1.677e-02, 6.908e-03, -2.625e-02, 1.219e-02, -2.834e-02, -8.249e-02, 1.091e-01, -4.634e-02, -2.081e-02, 4.880e-02, -2.060e-02, 6.800e-03, 6.378e-02) * s0_0_0;
	r0 += M4(1.627e-01, -9.654e-02, -3.654e-01, 7.616e-03, -2.229e-02, 3.089e-03, 2.485e-03, -2.970e-02, 2.634e-02, 3.505e-01, 1.324e-01, 4.067e-02, -2.937e-03, -2.827e-02, 1.957e-02, 9.268e-04) * s0_0_1;
	r1 += M4(1.007e-02, -8.631e-02, -1.870e-01, -9.745e-02, 2.664e-02, 1.080e-02, -7.551e-03, -3.699e-02, -8.670e-02, 7.543e-02, 9.215e-02, 7.638e-02, 1.502e-02, -3.312e-02, -1.083e-02, -7.296e-04) * s0_0_1;
	r2 += M4(-2.178e-01, -6.858e-02, 6.982e-02, 1.133e-01, 1.055e-02, 2.092e-01, 1.609e-02, -1.079e-01, 2.104e-01, -1.017e-01, 2.194e-01, 5.274e-03, -2.901e-02, -2.729e-02, 1.110e-02, -2.752e-02) * s0_0_1;
	r3 += M4(-1.023e-01, 1.800e-02, -6.703e-02, -9.801e-02, 1.087e-04, 4.014e-02, -3.727e-02, 2.093e-02, 3.590e-02, -1.477e-02, 3.889e-03, -1.401e-02, 1.650e-02, 1.283e-02, 3.435e-02, -1.434e-02) * s0_0_1;
	r0 += M4(2.580e-02, -3.210e-02, 1.047e-04, -3.617e-03, -3.782e-02, -5.677e-02, 4.575e-03, 2.943e-02, 1.003e-01, 1.184e-01, -5.073e-02, 1.475e-02, 2.045e-03, -1.343e-03, -2.023e-02, -6.398e-04) * s0_0_2;
	r1 += M4(2.298e-02, -2.451e-03, 2.655e-02, -2.424e-02, 5.330e-03, 2.531e-02, 6.286e-04, 4.249e-03, 1.706e-02, 4.941e-02, 2.113e-02, -3.079e-03, -1.586e-02, 2.244e-03, -5.901e-03, 1.239e-02) * s0_0_2;
	r2 += M4(-1.161e-01, -3.084e-02, -2.197e-02, 7.168e-02, 1.963e-02, 2.484e-02, -2.620e-02, 1.086e-02, 1.121e-01, 1.849e-02, 2.171e-02, -1.902e-02, 1.037e-03, -1.280e-02, -1.158e-02, -2.574e-02) * s0_0_2;
	r3 += M4(-5.154e-02, -1.765e-02, -5.761e-02, -3.266e-02, 5.165e-03, -1.651e-02, 3.539e-02, -9.516e-03, 1.256e-02, 3.697e-02, 6.515e-02, -6.681e-03, 1.875e-02, -2.642e-02, -9.066e-03, 5.567e-03) * s0_0_2;
	r0 += M4(-7.608e-02, -8.749e-02, 8.919e-02, 3.184e-02, 3.027e-02, 2.700e-02, 4.459e-02, -1.604e-02, -1.261e-01, -6.010e-02, 3.265e-03, 5.475e-02, 1.128e-02, -2.939e-01, -1.277e-01, -3.546e-03) * s0_1_0;
	r1 += M4(-1.268e-01, -4.175e-02, -6.444e-02, -1.448e-01, -2.757e-02, -4.722e-03, -1.485e-03, 1.671e-01, -4.342e-02, 1.242e-02, 1.363e-01, -8.116e-02, -1.075e-01, -2.022e-01, -2.137e-01, -2.074e-01) * s0_1_0;
	r2 += M4(-8.346e-02, -5.578e-02, -1.135e-01, 7.934e-02, 8.708e-02, 2.615e-01, 5.565e-02, -1.253e-01, 2.153e-02, 2.082e-02, -1.091e-01, -5.384e-02, 7.406e-02, -6.361e-02, -4.252e-02, 4.099e-03) * s0_1_0;
	r3 += M4(-6.273e-02, -7.635e-02, 4.912e-02, -6.293e-02, 6.313e-02, -2.153e-02, -1.655e-02, 8.926e-03, -9.220e-02, -1.840e-01, 2.825e-02, 2.610e-02, -7.357e-02, 7.860e-02, 1.218e-01, 3.625e-02) * s0_1_0;
	r0 += M4(-1.458e-01, -3.177e-01, -9.435e-02, -2.672e-01, 1.851e-01, 1.294e-01, 2.406e-01, 8.833e-02, -5.873e-01, -3.103e-01, -2.666e-01, -2.456e-01, 5.504e-03, 6.970e-02, -1.672e-01, 4.867e-03) * s0_1_1;
	r1 += M4(-4.330e-01, -1.564e-01, -5.230e-01, -3.284e-01, -6.475e-02, 1.935e-01, 1.045e-02, -3.580e-03, 2.789e-02, -3.924e-01, -6.389e-01, -1.032e-01, -2.642e-03, 4.807e-02, 9.643e-02, 4.167e-02) * s0_1_1;
	r2 += M4(-4.423e-01, 5.960e-01, -1.386e-01, -4.600e-01, 1.645e-01, -1.000e+00, 2.511e-01, 1.000e+00, -2.887e-01, -2.446e-01, -6.191e-01, 1.966e-01, -4.248e-01, 2.006e-01, 1.637e-02, -5.355e-02) * s0_1_1;
	r3 += M4(-1.479e-01, -2.298e-01, -2.281e-01, -2.895e-01, 1.362e-01, 3.133e-02, 8.719e-02, 9.619e-02, -3.403e-01, -4.111e-01, -8.588e-02, -2.178e-01, -7.822e-02, -2.008e-03, -1.853e-01, -7.504e-02) * s0_1_1;
	r0 += M4(-1.261e-01, -8.479e-02, -7.838e-02, -2.464e-01, 2.224e-02, 4.936e-02, 4.258e-02, 5.150e-02, -8.998e-02, -8.878e-02, -2.170e-01, 1.288e-01, -4.650e-02, 1.035e-02, -4.206e-02, -2.466e-02) * s0_1_2;
	r1 += M4(1.505e-02, -1.021e-01, -6.406e-03, -2.838e-02, 9.287e-03, 1.038e-01, 2.229e-02, -1.259e-02, 6.772e-02, 9.956e-02, 8.413e-02, 5.844e-03, 3.024e-02, 2.122e-02, 1.461e-02, -9.173e-03) * s0_1_2;
	r2 += M4(-2.362e-01, -5.722e-02, -1.530e-01, 1.080e-01, 4.837e-03, 8.347e-02, 3.640e-02, -4.103e-02, 2.319e-01, -9.964e-02, -8.524e-02, 4.220e-02, -1.411e-01, -1.189e-02, -3.724e-02, -9.030e-03) * s0_1_2;
	r3 += M4(-1.136e-01, -6.329e-02, -2.565e-01, -2.099e-01, 9.355e-03, -3.166e-02, 4.824e-02, -4.355e-02, -5.533e-02, 7.100e-02, -1.959e-02, 6.491e-02, 7.398e-04, -2.083e-02, 3.403e-02, 8.184e-03) * s0_1_2;
	r0 += M4(-3.115e-02, -5.227e-02, 3.344e-02, -3.743e-02, -2.233e-02, -1.895e-02, -3.483e-02, -3.044e-03, 4.689e-02, -7.983e-02, 6.664e-02, -6.572e-02, 6.323e-02, 2.156e-01, 7.271e-02, 1.582e-01) * s0_2_0;
	r1 += M4(9.543e-03, -3.601e-02, -2.096e-02, -6.925e-02, -3.295e-02, 6.996e-03, -3.804e-02, 1.924e-02, 3.573e-02, 7.643e-02, -4.281e-02, 5.386e-02, 3.480e-02, 2.931e-02, 1.643e-01, -2.402e-04) * s0_2_0;
	r2 += M4(-7.393e-02, -3.083e-02, -6.470e-02, 5.065e-02, -7.050e-02, 7.009e-02, -2.317e-02, 1.609e-02, -2.567e-02, -1.496e-02, 9.801e-02, -5.112e-02, 2.307e-02, 1.439e-01, 3.895e-02, -1.636e-01) * s0_2_0;
	r3 += M4(6.146e-02, 1.059e-01, -3.367e-02, -2.126e-02, -2.274e-02, 2.201e-02, -1.108e-03, -6.063e-02, -1.781e-02, 2.752e-03, -7.312e-03, 2.016e-02, 9.511e-02, -2.474e-02, 5.672e-02, 1.977e-01) * s0_2_0;
	r0 += M4(8.751e-02, 3.436e-02, 7.214e-02, -6.372e-02, -1.529e-02, 1.489e-02, -9.957e-02, 1.604e-02, 6.418e-03, -4.249e-03, -1.145e-01, 7.004e-02, 6.280e-02, 1.788e-01, 2.013e-01, -2.401e-01) * s0_2_1;
	r1 += M4(-1.300e-01, -9.277e-02, 3.921e-02, -9.084e-02, 6.177e-02, 7.447e-02, 5.617e-02, 3.845e-02, -2.764e-01, -9.604e-02, -2.117e-02, -1.353e-01, 1.213e-01, -4.825e-02, 6.779e-02, -5.612e-02) * s0_2_1;
	r2 += M4(-1.481e-01, -1.608e-02, -7.172e-02, 8.692e-02, -6.487e-03, 1.726e-01, 1.276e-02, -8.668e-02, 7.549e-02, 1.215e-02, -5.088e-03, -2.236e-02, 4.414e-02, -1.028e-01, -7.143e-02, 4.426e-02) * s0_2_1;
	r3 += M4(2.601e-02, -6.061e-02, 4.890e-02, -1.239e-01, 2.001e-02, 7.552e-02, 1.375e-03, 1.069e-01, -6.955e-03, -3.841e-02, -4.347e-04, -2.879e-02, -2.412e-02, -1.670e-01, 4.809e-02, -1.527e-01) * s0_2_1;
	r0 += M4(-1.725e-02, -4.185e-02, -5.408e-03, -4.363e-02, -3.244e-02, -2.150e-02, -4.891e-02, 1.963e-02, 3.354e-02, 2.974e-02, -4.920e-02, -3.485e-02, 4.094e-02, 4.885e-02, 6.664e-02, 5.652e-02) * s0_2_2;
	r1 += M4(-1.939e-03, 4.367e-02, -3.256e-02, -1.529e-02, -5.910e-03, 3.159e-02, 2.323e-03, -1.828e-03, 2.358e-02, 3.041e-02, 1.177e-02, 7.909e-02, -7.461e-03, 3.314e-02, -3.552e-02, 5.747e-03) * s0_2_2;
	r2 += M4(-4.602e-02, -2.651e-03, -1.720e-02, 4.132e-02, -3.737e-02, 1.989e-02, -4.643e-05, -1.136e-02, 1.312e-01, 1.872e-02, 3.857e-02, -4.713e-03, -1.448e-04, -8.854e-04, 1.940e-02, -1.427e-02) * s0_2_2;
	r3 += M4(6.862e-03, -2.008e-02, 1.231e-02, -3.624e-02, 2.840e-02, 4.748e-02, 2.994e-02, 4.502e-03, -3.760e-04, -8.677e-04, -4.524e-02, -5.059e-02, 3.707e-02, 3.687e-02, 4.725e-02, -2.626e-02) * s0_2_2;
	r0 += M4(-3.778e-02, -5.326e-02, 2.551e-02, -1.373e-03, 1.009e-02, -1.050e-03, 9.963e-02, -2.675e-03, -1.550e-02, -1.442e-02, -6.059e-02, -1.542e-02, 1.434e-01, 6.192e-02, 1.974e-01, -3.129e-03) * s1_0_0;
	r1 += M4(-2.125e-02, 3.764e-02, 2.417e-02, 6.012e-02, 1.494e-02, -1.865e-02, -1.871e-02, -4.726e-02, 3.375e-03, 3.033e-02, 3.772e-02, -1.466e-03, -7.627e-02, -3.497e-02, -7.929e-03, 2.682e-02) * s1_0_0;
	r2 += M4(-1.828e-03, 6.123e-02, -3.929e-02, -2.814e-02, 1.719e-02, -5.140e-02, -4.266e-02, 7.696e-02, 7.722e-03, -9.342e-04, 1.355e-02, -4.168e-02, 3.468e-02, -1.581e-01, 8.516e-02, 7.710e-02) * s1_0_0;
	r3 += M4(-7.569e-03, -2.424e-03, 2.038e-02, -1.003e-02, -3.622e-03, 2.830e-02, -2.138e-02, 2.579e-02, -6.354e-03, -2.535e-02, 4.398e-03, 2.149e-02, 7.633e-02, -1.576e-02, -4.072e-03, -5.786e-02) * s1_0_0;
	r0 += M4(-7.098e-02, -7.617e-02, 4.146e-02, 3.107e-02, -5.315e-02, 6.382e-02, 1.415e-01, 1.835e-03, 4.785e-02, -1.997e-01, -2.693e-02, 4.235e-02, 2.083e-01, 1.860e-01, 2.423e-01, 1.885e-02) * s1_0_1;
	r1 += M4(-1.403e-02, 1.097e-02, 5.414e-02, 1.188e-01, 3.303e-03, 3.799e-02, 1.246e-01, 6.049e-02, -1.043e-02, 3.686e-02, -2.659e-02, 4.051e-02, -9.890e-02, 5.136e-02, 7.306e-02, 2.886e-02) * s1_0_1;
	r2 += M4(1.228e-02, 4.281e-02, -1.974e-01, -5.768e-02, 9.238e-02, 1.167e-01, -8.914e-02, -8.782e-02, -3.137e-02, -7.735e-02, -1.500e-01, 2.680e-02, -8.796e-03, 2.151e-01, -3.062e-01, -1.082e-01) * s1_0_1;
	r3 += M4(-7.182e-02, -6.130e-02, 1.030e-01, 1.243e-01, 9.864e-03, -1.417e-02, 3.309e-02, 7.630e-02, 2.341e-02, -2.382e-02, -2.156e-03, 1.293e-02, 2.954e-01, -3.796e-02, 1.504e-01, 9.937e-02) * s1_0_1;
	r0 += M4(-1.651e-02, -6.623e-02, -6.193e-02, 9.187e-03, 2.204e-02, 1.538e-01, 3.339e-02, 5.144e-02, -5.349e-02, -6.477e-02, -4.572e-02, -2.385e-02, 1.730e-01, 1.671e-03, -3.761e-02, -7.540e-02) * s1_0_2;
	r1 += M4(6.161e-02, 1.954e-02, 1.409e-02, -2.357e-02, 9.062e-02, -1.462e-02, 2.327e-02, 2.685e-02, -4.036e-02, 2.000e-02, -2.589e-02, -4.310e-03, 2.667e-02, -2.822e-02, -3.430e-02, -3.829e-02) * s1_0_2;
	r2 += M4(-1.218e-01, 2.560e-02, -3.748e-02, -8.357e-02, 8.620e-02, -1.802e-02, 1.060e-01, 3.187e-02, -2.818e-02, -3.686e-02, -5.419e-02, -1.931e-02, -8.447e-02, -1.650e-01, 9.880e-02, 2.417e-02) * s1_0_2;
	r3 += M4(-2.611e-03, -9.685e-02, 4.728e-02, 8.528e-02, 2.891e-02, -5.239e-02, -1.672e-02, 4.005e-02, 1.507e-02, -2.883e-03, -1.021e-02, -1.367e-03, 5.152e-02, 8.448e-02, -7.730e-02, -1.124e-01) * s1_0_2;
	r0 += M4(-2.986e-02, -4.845e-02, -4.569e-02, 7.264e-03, 1.428e-01, 9.279e-02, -7.521e-02, -3.771e-02, -9.806e-02, -4.850e-02, -7.003e-03, 3.749e-02, -2.700e-03, -3.290e-02, -1.671e-01, -6.276e-02) * s1_1_0;
	r1 += M4(7.446e-02, 7.717e-03, 4.373e-02, 4.296e-02, 5.467e-02, 2.875e-02, 8.211e-02, 9.841e-02, -8.439e-02, 7.281e-02, -7.448e-02, -1.255e-01, -1.234e-01, -1.537e-02, -1.323e-01, -3.428e-01) * s1_1_0;
	r2 += M4(2.738e-02, -5.826e-02, -8.601e-02, 4.655e-02, -5.351e-02, 3.340e-02, 1.872e-01, -6.544e-02, -3.245e-02, 2.921e-02, -1.236e-01, -3.357e-02, -2.358e-02, 1.095e-01, 1.585e-02, -8.979e-02) * s1_1_0;
	r3 += M4(-4.245e-02, -1.209e-02, -3.208e-03, 4.722e-02, 4.453e-02, 1.531e-01, -1.337e-02, -1.022e-01, 5.841e-03, -9.170e-02, -8.752e-03, -1.050e-01, -9.883e-03, 3.059e-02, 1.706e-02, -2.922e-01) * s1_1_0;
	r0 += M4(-1.132e-02, -4.199e-02, -1.621e-01, -6.076e-02, 3.165e-01, -2.706e-01, -1.183e-01, 3.114e-01, -2.495e-01, 1.753e-01, -3.384e-02, -1.109e-01, -1.692e-01, -2.314e-01, -2.603e-01, -8.505e-03) * s1_1_1;
	r1 += M4(4.798e-01, -6.040e-02, 4.622e-01, 5.169e-01, 1.615e-01, -9.801e-02, -3.213e-01, -2.173e-01, 1.620e-01, 2.825e-02, 2.109e-01, 1.645e-01, 2.026e-01, -2.260e-01, -7.418e-02, 1.325e-01) * s1_1_1;
	r2 += M4(-2.767e-01, -5.963e-01, -1.138e-02, 5.247e-01, 3.136e-01, -1.599e-01, 1.898e-01, 9.047e-02, 1.402e-01, 3.171e-01, 5.803e-02, -3.326e-01, -2.075e-01, 3.929e-01, -1.880e-01, -3.655e-01) * s1_1_1;
	r3 += M4(-4.401e-02, 5.500e-02, -8.929e-02, 5.497e-01, -4.153e-02, 4.345e-01, 1.651e-01, 1.314e-01, -9.528e-02, -3.447e-01, 1.382e-01, 3.227e-01, 4.043e-01, 6.160e-01, -2.084e-01, 2.967e-02) * s1_1_1;
	r0 += M4(1.680e-01, -4.475e-02, 4.237e-02, 4.031e-01, 1.610e-02, -1.769e-01, -8.174e-02, -7.153e-02, 8.334e-02, 9.655e-03, 1.350e-02, 5.131e-02, -5.238e-02, 6.824e-02, 2.726e-01, -1.265e-01) * s1_1_2;
	r1 += M4(2.612e-02, -4.360e-02, 1.339e-02, 6.131e-02, -5.421e-03, 4.786e-02, 9.345e-02, 5.952e-02, -1.409e-02, 1.370e-02, -7.607e-02, -3.796e-02, -1.557e-01, -1.790e-01, -2.051e-02, -3.571e-02) * s1_1_2;
	r2 += M4(1.608e-01, 6.209e-01, 7.109e-02, -6.328e-01, -2.057e-01, 1.252e-02, -1.834e-02, -3.075e-02, -1.479e-01, -4.834e-02, 6.341e-02, -6.311e-02, -2.947e-02, 1.422e-03, -4.978e-02, -1.660e-02) * s1_1_2;
	r3 += M4(1.643e-01, -2.504e-04, 4.212e-01, 2.135e-01, -5.158e-02, 1.722e-01, -1.934e-01, -8.618e-02, 4.685e-02, 3.571e-02, -3.023e-02, -9.096e-02, 1.549e-02, -5.524e-02, -7.691e-02, -2.646e-02) * s1_1_2;
	r0 += M4(-3.526e-02, -1.307e-02, 6.803e-03, 1.304e-02, 4.454e-02, 4.265e-02, 6.165e-02, 3.018e-02, -8.506e-03, -8.856e-02, -4.712e-02, -3.125e-03, -6.340e-03, -1.586e-03, 8.167e-03, 1.359e-02) * s1_2_0;
	r1 += M4(-2.124e-03, 2.813e-02, 1.127e-03, 1.094e-02, 6.217e-02, 2.865e-02, 9.523e-03, 5.716e-02, 5.332e-02, 6.368e-02, 1.253e-02, 7.520e-02, 5.533e-03, 5.325e-03, 1.218e-03, -1.184e-01) * s1_2_0;
	r2 += M4(-7.458e-03, 4.623e-02, -1.595e-02, -3.162e-02, -9.329e-04, -1.592e-02, 5.910e-02, 1.066e-02, -4.017e-03, 2.713e-02, 2.146e-04, -9.013e-02, 1.288e-02, -3.498e-02, 8.217e-04, -2.256e-02) * s1_2_0;
	r3 += M4(5.489e-03, -3.886e-03, 6.538e-03, 3.925e-02, 2.584e-02, -1.737e-01, -2.464e-02, 2.662e-02, 2.589e-03, 5.220e-02, 9.236e-03, -3.174e-02, -1.834e-02, -3.229e-02, 9.573e-03, 4.235e-02) * s1_2_0;
	r0 += M4(-1.002e-01, -6.164e-03, -1.239e-01, 3.247e-02, -4.380e-02, 1.919e-01, 2.710e-01, 1.463e-02, -8.190e-02, -2.221e-01, -8.428e-02, 7.246e-02, 1.035e-01, 8.188e-02, 2.666e-01, -8.719e-02) * s1_2_1;
	r1 += M4(2.106e-02, 1.891e-02, 2.648e-03, 5.834e-02, -2.202e-01, -1.059e-01, 2.597e-02, -1.236e-01, -1.581e-01, -1.172e-01, -2.455e-01, -1.426e-02, -8.918e-02, -1.431e-01, -6.235e-02, -1.023e-02) * s1_2_1;
	r2 += M4(3.452e-02, 6.941e-02, -2.875e-02, -5.652e-02, 1.781e-01, 4.051e-02, -5.593e-02, -1.295e-02, -1.568e-01, -1.911e-01, -7.827e-02, 6.708e-02, 7.082e-03, -1.291e-01, 5.971e-02, 1.163e-01) * s1_2_1;
	r3 += M4(-1.612e-01, -2.085e-01, 2.776e-02, 2.681e-02, -1.841e-02, -2.930e-01, 4.676e-04, 1.037e-02, -6.137e-02, -2.527e-02, -7.892e-02, 1.301e-01, -8.623e-02, -2.660e-01, -6.448e-02, -1.431e-01) * s1_2_1;
	r0 += M4(-6.817e-02, -8.082e-02, -2.373e-01, 3.159e-02, -8.324e-02, -8.328e-03, 7.070e-02, -2.686e-02, 9.337e-03, -1.127e-01, -6.083e-02, -1.244e-01, -4.480e-03, -2.622e-02, -3.517e-02, -2.591e-02) * s1_2_2;
	r1 += M4(5.053e-02, -7.455e-02, 3.140e-02, -3.099e-02, 4.314e-02, -6.421e-02, -3.822e-02, 3.410e-02, -1.582e-02, 7.183e-02, -2.832e-02, -5.506e-02, 1.868e-02, -3.134e-02, 2.108e-02, -1.065e-02) * s1_2_2;
	r2 += M4(-5.169e-02, -2.458e-02, -7.105e-02, 1.571e-03, -7.251e-02, 2.907e-02, -4.609e-02, -9.053e-03, -1.013e-01, -1.372e-01, -4.184e-02, 3.896e-02, 1.278e-02, 1.321e-02, -5.344e-03, -2.454e-02) * s1_2_2;
	r3 += M4(-7.791e-02, -2.282e-01, 2.214e-02, 1.258e-01, -4.909e-03, -7.124e-02, -2.235e-02, -1.947e-02, 2.560e-02, -5.179e-02, -6.660e-02, -1.264e-01, -2.541e-02, 4.245e-02, -1.329e-02, 5.098e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2]; s1_0_0 = G[3][xy.y+0][xy.x+0];
	s1_0_1 = G[3][xy.y+0][xy.x+1]; s1_0_2 = G[3][xy.y+0][xy.x+2];
	s1_1_0 = G[3][xy.y+1][xy.x+0]; s1_1_1 = G[3][xy.y+1][xy.x+1];
	s1_1_2 = G[3][xy.y+1][xy.x+2]; s1_2_0 = G[3][xy.y+2][xy.x+0];
	s1_2_1 = G[3][xy.y+2][xy.x+1]; s1_2_2 = G[3][xy.y+2][xy.x+2];
	r0 += M4(-2.567e-02, 7.006e-02, -1.182e-01, -1.846e-03, 2.357e-02, -9.985e-02, 2.245e-01, -3.986e-03, -2.911e-02, -5.612e-02, -4.028e-02, 5.200e-03, -7.405e-02, 1.740e-02, -3.124e-02, 3.520e-02) * s0_0_0;
	r1 += M4(-5.013e-02, -3.736e-03, -5.819e-02, 4.090e-02, 2.894e-02, 1.111e-02, 5.800e-02, -2.442e-02, 1.852e-02, -1.530e-02, -9.707e-02, -5.999e-02, -8.626e-03, 3.243e-02, -2.705e-02, 6.043e-02) * s0_0_0;
	r2 += M4(-7.213e-02, 3.249e-02, 7.629e-02, -1.490e-02, -5.338e-02, -5.812e-02, -1.704e-01, -5.029e-03, -7.001e-02, 5.520e-02, -7.836e-02, -7.010e-03, 1.534e-01, -7.073e-02, 7.044e-02, 1.750e-02) * s0_0_0;
	r3 += M4(-1.743e-02, 4.175e-02, 3.725e-02, -3.499e-03, 7.981e-02, -1.228e-02, -5.254e-02, 6.913e-04, -2.027e-02, -8.944e-03, -9.993e-04, -2.004e-02, 3.032e-02, -1.888e-02, -7.611e-03, 1.089e-01) * s0_0_0;
	r0 += M4(1.015e-01, -1.439e-01, -1.485e-01, -1.497e-02, -1.017e-01, 2.810e-01, 1.599e-01, 1.198e-02, -2.721e-02, 1.536e-02, -7.807e-03, -5.335e-02, -1.061e-01, -2.131e-01, -1.129e-01, 9.984e-04) * s0_0_1;
	r1 += M4(6.563e-02, 7.303e-02, 3.723e-02, 7.631e-02, -2.663e-02, -2.317e-02, -9.798e-02, -1.037e-01, -7.490e-02, -1.195e-01, -9.514e-03, -5.721e-02, 1.080e-02, -1.059e-01, -4.190e-02, 8.891e-03) * s0_0_1;
	r2 += M4(-1.972e-01, 6.457e-03, 1.524e-01, 2.053e-02, 1.032e-01, 7.786e-02, -1.761e-03, -1.053e-01, -7.678e-02, 6.428e-02, -4.793e-02, -5.045e-03, -5.030e-02, -1.214e-01, -1.587e-01, 1.165e-02) * s0_0_1;
	r3 += M4(3.216e-02, 1.564e-01, -1.620e-01, -4.708e-03, -6.409e-03, -1.596e-01, 1.852e-01, -1.083e-02, -4.764e-02, -4.392e-02, -1.122e-01, -5.902e-02, -1.893e-02, 4.362e-02, -1.999e-01, -4.687e-02) * s0_0_1;
	r0 += M4(1.373e-01, -1.584e-01, -1.505e-01, 6.238e-02, -9.977e-03, 4.213e-02, 8.363e-02, -2.691e-03, -3.299e-02, -7.354e-03, -7.549e-03, -4.479e-02, 2.173e-02, 1.683e-02, -4.659e-03, -8.781e-03) * s0_0_2;
	r1 += M4(-4.811e-02, 1.082e-01, -2.703e-02, -1.988e-02, 7.009e-02, -4.959e-03, 6.158e-02, 2.620e-03, 9.965e-05, -5.573e-02, -6.536e-03, -1.262e-02, 3.330e-03, -1.098e-02, -3.796e-04, 1.905e-02) * s0_0_2;
	r2 += M4(7.736e-02, -3.724e-03, 9.607e-02, 3.946e-02, -5.159e-02, 4.149e-02, -8.322e-02, -7.545e-03, -3.303e-02, 1.021e-02, -1.254e-02, 5.326e-04, -1.480e-02, 5.299e-03, 5.864e-02, -2.655e-02) * s0_0_2;
	r3 += M4(9.751e-02, 7.261e-02, 6.961e-02, 7.049e-02, -1.401e-02, -3.691e-03, -9.141e-04, -5.959e-02, -2.009e-02, 1.250e-02, -4.135e-02, -3.741e-02, -2.408e-02, 3.666e-02, -6.164e-02, 7.528e-02) * s0_0_2;
	r0 += M4(8.850e-02, 1.440e-01, 3.888e-02, 1.121e-01, -1.443e-01, -5.209e-02, -1.458e-01, -8.862e-02, -1.032e-01, 8.076e-03, -6.082e-02, -4.334e-02, 7.679e-04, -1.033e-01, 3.816e-02, 8.764e-02) * s0_1_0;
	r1 += M4(-1.345e-01, -6.985e-02, -1.783e-01, 1.072e-02, 2.200e-01, -3.357e-02, 9.370e-02, -2.000e-02, -2.278e-01, -1.562e-01, -1.509e-01, -1.272e-01, 3.884e-02, 8.118e-02, 1.382e-01, -6.854e-02) * s0_1_0;
	r2 += M4(-1.268e-02, -1.267e-01, 7.685e-03, 1.048e-01, -8.896e-03, 1.850e-01, -1.954e-01, -1.072e-01, -5.614e-02, 4.429e-02, -8.750e-02, 4.617e-02, 2.150e-01, -1.805e-01, -2.071e-02, 2.887e-02) * s0_1_0;
	r3 += M4(6.154e-02, -6.517e-02, -8.409e-03, 1.723e-01, -8.016e-02, 2.046e-01, 1.921e-02, 2.707e-02, -8.305e-02, -9.316e-02, -1.881e-02, -1.470e-01, 6.569e-03, -1.461e-01, 9.068e-02, 2.110e-01) * s0_1_0;
	r0 += M4(2.549e-01, 4.752e-01, 4.161e-01, -2.920e-01, -7.178e-02, -1.423e-01, -5.956e-01, 1.902e-01, -1.231e-01, 1.001e-02, -1.626e-01, -1.085e-01, -3.545e-01, -4.440e-01, -3.897e-01, -5.097e-01) * s0_1_1;
	r1 += M4(3.374e-01, 2.080e-01, 4.561e-01, 1.297e-01, -2.301e-01, -3.990e-01, 2.222e-01, -1.879e-01, -4.441e-02, -1.233e-01, -9.271e-02, -5.938e-02, -1.569e-01, -2.423e-01, -3.020e-01, 7.116e-02) * s0_1_1;
	r2 += M4(8.261e-02, -1.382e-01, 2.675e-02, 9.029e-02, -1.924e-01, -1.676e-01, -5.075e-02, -5.871e-03, 1.545e-02, 1.741e-01, -2.114e-02, -7.847e-02, -6.321e-02, -1.700e-01, -2.734e-01, 1.101e-01) * s0_1_1;
	r3 += M4(2.280e-01, -2.627e-01, -2.277e-02, 6.558e-02, -2.211e-01, 3.516e-01, -1.485e-01, -2.418e-01, -2.967e-02, 3.401e-02, -1.134e-01, 5.960e-03, -2.822e-01, -5.137e-01, -3.291e-01, -1.831e-01) * s0_1_1;
	r0 += M4(7.416e-02, 1.363e-01, 2.320e-01, 1.919e-01, 1.996e-02, 1.166e-01, -5.461e-02, 7.512e-02, -5.413e-02, -2.221e-02, -1.331e-01, -7.395e-02, -3.906e-02, -8.130e-02, -3.844e-02, -1.476e-01) * s0_1_2;
	r1 += M4(-9.849e-02, 4.604e-02, -9.217e-02, 5.695e-03, 4.799e-02, 9.356e-03, 9.028e-04, 3.982e-02, -1.806e-02, -1.200e-01, -1.812e-02, -1.001e-02, -4.542e-02, -1.394e-01, -1.714e-02, 5.130e-02) * s0_1_2;
	r2 += M4(1.007e-01, 2.290e-01, 7.537e-02, -1.406e-01, 6.311e-02, -1.223e-01, -2.697e-02, 1.537e-02, -2.485e-01, 1.340e-02, -4.624e-02, -2.253e-03, 1.896e-01, -3.699e-02, 2.223e-03, -4.380e-02) * s0_1_2;
	r3 += M4(6.910e-02, -1.371e-02, 1.738e-01, 3.628e-02, -4.404e-03, 7.369e-05, 2.036e-01, 3.200e-02, -3.351e-02, -2.268e-02, -8.617e-02, -5.813e-02, -5.456e-02, -1.015e-01, -1.218e-01, 9.093e-02) * s0_1_2;
	r0 += M4(5.801e-04, -6.927e-02, -7.765e-02, -5.728e-02, 4.148e-02, 1.094e-01, -4.755e-03, 5.748e-02, 1.213e-02, 1.898e-02, 7.565e-02, 3.676e-03, 2.917e-02, 1.793e-02, -1.427e-02, 1.566e-02) * s0_2_0;
	r1 += M4(-1.013e-01, 1.929e-02, -4.123e-03, -1.042e-01, -8.732e-03, -1.077e-01, 9.509e-02, 2.643e-02, -6.125e-02, -3.869e-02, -3.237e-02, -5.351e-02, 1.018e-01, 6.570e-02, 2.170e-02, 8.048e-02) * s0_2_0;
	r2 += M4(1.271e-02, 1.689e-02, 3.575e-02, 2.234e-02, -9.954e-02, -9.164e-03, -1.231e-01, 5.513e-02, -3.468e-02, 7.852e-02, -2.368e-02, 2.218e-02, 6.504e-02, 1.922e-03, 4.809e-02, -6.699e-02) * s0_2_0;
	r3 += M4(6.552e-02, 7.481e-02, 1.628e-02, -4.590e-03, 4.397e-02, -6.097e-02, 4.161e-02, 4.168e-02, -2.783e-02, 6.207e-03, 1.126e-02, -4.266e-02, 7.103e-02, 1.778e-01, -1.090e-02, 6.583e-02) * s0_2_0;
	r0 += M4(1.757e-02, -6.156e-02, -1.568e-01, 5.182e-02, -1.410e-01, -5.486e-02, -1.811e-01, -5.604e-02, 3.310e-02, -4.463e-03, -1.464e-02, -4.598e-02, 4.442e-02, 1.061e-01, 2.467e-01, 2.644e-02) * s0_2_1;
	r1 += M4(2.574e-02, 9.541e-02, 1.465e-02, 3.342e-02, 1.630e-01, 9.931e-02, -2.573e-02, 1.251e-01, -4.766e-02, -6.214e-02, -5.208e-02, -4.513e-02, -8.815e-02, -1.461e-02, 7.550e-02, -4.088e-02) * s0_2_1;
	r2 += M4(1.676e-01, 2.033e-01, 7.982e-02, -1.401e-01, -1.091e-01, -3.821e-04, -3.628e-02, -7.215e-02, -4.603e-02, 5.750e-02, -2.334e-02, 3.228e-02, 2.303e-01, -7.449e-02, 6.016e-02, -2.131e-02) * s0_2_1;
	r3 += M4(-1.471e-01, 1.200e-02, 2.569e-02, -4.554e-02, 1.061e-01, 2.282e-01, 7.577e-02, -6.547e-02, 1.235e-02, -9.110e-03, -5.338e-02, -2.415e-02, 9.323e-02, 2.270e-01, 2.239e-02, 7.858e-03) * s0_2_1;
	r0 += M4(5.541e-02, 1.254e-03, -9.881e-02, -5.158e-02, 6.367e-02, -8.109e-03, -2.331e-01, 1.579e-02, 2.684e-02, 1.054e-02, -2.327e-02, -4.142e-02, 1.811e-02, 2.445e-02, 8.175e-02, 2.308e-02) * s0_2_2;
	r1 += M4(-1.418e-02, -2.913e-02, -2.028e-02, 5.218e-03, -7.034e-02, -1.488e-01, -2.469e-02, -4.836e-02, -2.947e-02, -9.443e-02, -2.080e-02, -3.689e-02, 2.049e-02, 3.408e-02, 9.912e-03, 1.706e-02) * s0_2_2;
	r2 += M4(2.652e-02, -8.708e-02, 2.093e-02, 7.470e-02, -1.609e-01, -5.981e-02, 5.421e-02, -1.060e-02, -1.102e-01, -1.987e-03, -3.853e-02, 2.716e-02, 1.743e-02, -2.377e-02, -2.476e-02, -1.528e-03) * s0_2_2;
	r3 += M4(1.616e-02, 1.018e-03, -1.194e-02, 1.137e-02, -4.329e-02, 2.408e-02, -5.464e-02, -1.163e-02, -9.313e-03, -7.875e-02, -4.236e-02, -7.034e-02, 5.199e-02, 5.957e-02, 2.505e-02, 2.683e-03) * s0_2_2;
	r0 += M4(-1.567e-02, -1.792e-01, -8.752e-02, 9.240e-03, -4.321e-02, 5.171e-02, -2.181e-03, -2.038e-03, -3.784e-02, 8.860e-02, 1.781e-01, 4.676e-02, 1.279e-03, -2.356e-02, 6.099e-02, 1.797e-02) * s1_0_0;
	r1 += M4(7.100e-02, 9.202e-02, 1.274e-01, 3.706e-02, -9.161e-03, -7.768e-03, -6.041e-02, -1.072e-01, 1.968e-02, 1.074e-01, 1.499e-02, -4.620e-04, -9.928e-03, -1.250e-02, -3.061e-02, -7.690e-02) * s1_0_0;
	r2 += M4(-1.646e-02, 5.484e-02, -6.624e-02, -6.100e-02, 3.602e-02, -1.390e-02, 1.100e-02, -1.308e-02, 3.752e-02, 6.949e-02, 1.133e-02, -4.377e-02, -4.935e-02, -3.897e-02, -4.388e-02, 1.681e-02) * s1_0_0;
	r3 += M4(-1.358e-02, -5.391e-02, 1.714e-03, 3.306e-03, 3.061e-02, 3.321e-02, 4.817e-03, -6.786e-02, -1.662e-03, -7.495e-02, 1.360e-03, -1.568e-02, 3.126e-03, 9.595e-03, 1.248e-02, -3.701e-02) * s1_0_0;
	r0 += M4(-5.270e-02, -1.300e-01, 2.569e-02, 4.587e-02, -1.637e-02, 1.027e-01, 3.415e-02, -2.203e-02, -8.375e-02, -1.109e-01, 1.100e-01, 7.450e-03, 3.428e-02, 1.448e-01, 1.810e-02, -4.322e-02) * s1_0_1;
	r1 += M4(3.224e-04, 8.301e-02, 5.827e-02, 7.759e-02, 1.753e-03, -1.470e-02, 1.466e-02, -6.600e-03, 4.685e-03, 8.880e-02, 9.605e-03, 5.645e-02, -4.808e-02, -6.848e-02, -4.176e-02, -3.018e-02) * s1_0_1;
	r2 += M4(-4.719e-02, -8.675e-03, -1.615e-01, -1.502e-02, 2.986e-02, 1.151e-02, 6.370e-02, -3.240e-02, 1.120e-01, 5.379e-02, -1.964e-01, -3.497e-02, 2.387e-02, 1.067e-02, 6.635e-02, -1.209e-02) * s1_0_1;
	r3 += M4(-3.710e-03, -9.953e-02, 1.265e-01, 9.838e-02, -1.016e-03, 6.388e-02, -9.595e-02, -4.360e-02, 4.985e-02, -1.242e-01, 5.559e-02, 6.099e-02, 1.542e-02, 1.208e-02, -3.626e-02, -8.076e-02) * s1_0_1;
	r0 += M4(-5.455e-03, 2.433e-02, 2.434e-02, 1.067e-02, -3.303e-02, 1.812e-04, 7.643e-02, 6.909e-03, 5.828e-02, 1.229e-02, 2.218e-02, -4.186e-03, 2.060e-02, 1.666e-04, -2.201e-03, -2.210e-02) * s1_0_2;
	r1 += M4(2.215e-02, 2.908e-02, 1.521e-02, -1.373e-04, -7.432e-03, 7.417e-03, 9.228e-04, 1.999e-02, 3.772e-03, -1.503e-02, -4.363e-02, -4.225e-02, 2.203e-02, -3.803e-02, -2.122e-03, -9.408e-03) * s1_0_2;
	r2 += M4(-3.567e-02, -2.873e-02, 4.415e-02, -3.816e-03, 7.243e-02, 2.335e-02, -7.529e-03, -1.226e-02, 4.399e-03, -4.504e-02, -7.115e-03, 6.046e-02, -4.603e-02, -2.359e-02, -8.246e-03, 1.418e-02) * s1_0_2;
	r3 += M4(-2.274e-03, -8.816e-03, 2.782e-02, 8.584e-04, 3.669e-03, 1.769e-02, 2.775e-02, 3.830e-02, 2.229e-02, 2.345e-02, -5.149e-02, -1.435e-02, -1.710e-02, 1.486e-02, -5.193e-02, -1.138e-02) * s1_0_2;
	r0 += M4(1.333e-01, 7.711e-02, 2.855e-02, -2.102e-02, -1.135e-01, -1.376e-01, -1.792e-01, -4.866e-02, 3.894e-02, -2.610e-01, 2.973e-02, -5.849e-02, 1.070e-01, 6.541e-02, -2.812e-01, 1.533e-02) * s1_1_0;
	r1 += M4(2.685e-01, 2.082e-01, 2.307e-01, 2.155e-02, -3.345e-01, -1.650e-01, -4.391e-01, -1.170e-01, -9.929e-02, -6.237e-02, -6.323e-02, 5.084e-03, -2.740e-02, -5.341e-02, -9.806e-02, -2.000e-01) * s1_1_0;
	r2 += M4(-2.712e-02, 3.467e-01, 1.293e-01, -3.338e-01, 2.746e-02, -2.917e-02, -1.255e-01, -8.939e-02, 5.841e-02, -8.908e-02, -6.546e-03, 2.760e-02, -8.943e-02, 6.175e-03, 1.450e-01, -2.752e-02) * s1_1_0;
	r3 += M4(1.119e-01, -1.334e-02, -3.177e-02, 1.209e-01, -9.633e-02, -1.097e-01, -1.096e-01, -3.037e-01, -1.222e-02, 1.528e-01, 1.607e-02, -3.010e-02, 5.084e-02, 7.188e-02, -1.717e-02, -1.588e-01) * s1_1_0;
	r0 += M4(-3.545e-01, -1.294e-01, -2.646e-01, 5.283e-01, -1.957e-01, -1.382e-01, -2.325e-01, -2.668e-01, 4.014e-01, -8.780e-02, 7.653e-02, 9.189e-04, -2.585e-01, 3.265e-01, -1.666e-01, -1.548e-01) * s1_1_1;
	r1 += M4(-4.431e-02, 5.410e-01, -1.134e-01, -1.701e-01, -3.209e-01, -4.922e-02, -3.213e-01, 5.517e-03, 3.584e-01, 1.557e-01, 3.545e-01, 1.652e-01, -1.898e-01, -2.990e-01, -1.500e-01, 8.588e-03) * s1_1_1;
	r2 += M4(1.605e-01, 8.887e-02, -2.174e-01, -4.766e-02, -6.553e-01, -3.703e-01, -8.887e-02, 1.665e-01, -1.992e-01, -1.338e-01, 3.551e-01, -1.102e-02, -3.681e-02, 6.381e-02, 2.580e-01, 1.801e-03) * s1_1_1;
	r3 += M4(-1.141e-01, -6.717e-02, 4.932e-01, -1.364e-01, -1.218e-01, -1.207e-01, -1.629e-01, -3.853e-01, -2.195e-02, 4.824e-01, -1.993e-01, 1.274e-02, -7.611e-02, 1.198e-01, -1.353e-01, -1.753e-01) * s1_1_1;
	r0 += M4(-1.074e-01, -8.516e-02, -6.479e-02, -1.044e-01, -3.066e-02, -1.229e-02, -2.101e-01, 3.411e-02, 1.454e-01, 1.126e-02, 1.657e-01, 1.251e-01, -7.867e-03, -6.981e-02, 1.449e-02, -1.323e-01) * s1_1_2;
	r1 += M4(3.654e-02, -6.548e-02, 4.236e-02, -1.198e-02, 4.596e-02, 8.618e-02, 4.568e-02, 8.263e-03, -1.401e-01, 1.586e-02, -9.538e-02, 1.258e-03, -3.859e-02, -1.565e-01, -6.938e-02, -4.233e-02) * s1_1_2;
	r2 += M4(-1.606e-01, -6.915e-02, -1.362e-01, 9.363e-02, 2.831e-02, -5.961e-02, -9.482e-02, 1.971e-02, 2.070e-01, 1.085e-01, 1.954e-01, -1.479e-01, -9.986e-02, -6.243e-03, 1.376e-02, 8.061e-03) * s1_1_2;
	r3 += M4(-7.732e-02, -9.413e-04, -9.362e-02, 3.511e-02, -4.981e-02, -1.171e-02, 4.286e-02, 3.436e-02, 8.965e-02, 9.017e-02, 1.889e-01, 3.739e-02, -8.380e-03, 4.965e-02, -1.661e-01, -5.285e-02) * s1_1_2;
	r0 += M4(-6.909e-02, -1.043e-01, -3.359e-04, 5.314e-03, -1.483e-01, -7.394e-03, -8.210e-02, -3.699e-02, -6.758e-02, 4.920e-03, -8.253e-03, -1.483e-02, 1.370e-01, 1.054e-01, 9.496e-02, -6.570e-02) * s1_2_0;
	r1 += M4(5.603e-02, 8.516e-02, 5.506e-02, -2.977e-02, -3.575e-01, -1.360e-01, -6.701e-02, -1.995e-01, 2.433e-02, -4.418e-02, -6.319e-02, -5.778e-02, -5.404e-02, -1.143e-01, -8.965e-02, -2.405e-02) * s1_2_0;
	r2 += M4(-5.169e-02, 4.610e-02, -5.227e-02, -1.987e-02, 9.583e-02, -5.697e-02, -1.189e-01, -3.512e-02, -1.205e-02, -2.761e-02, -5.323e-02, 5.211e-03, -1.104e-02, -3.186e-02, 2.152e-01, 3.303e-02) * s1_2_0;
	r3 += M4(-8.933e-02, -1.760e-01, 1.400e-02, 1.462e-02, -1.942e-01, -3.096e-01, 5.104e-03, -2.011e-01, 2.541e-02, 7.248e-02, -2.107e-02, -6.857e-02, 5.575e-02, 5.089e-02, -4.830e-02, -3.553e-02) * s1_2_0;
	r0 += M4(-5.584e-02, -7.291e-02, -1.373e-01, 5.640e-02, -8.675e-02, -4.893e-02, 3.497e-02, -3.730e-01, 3.939e-02, 4.862e-02, 7.018e-02, -1.142e-01, 3.734e-01, 4.318e-02, 2.095e-01, 6.666e-02) * s1_2_1;
	r1 += M4(6.115e-02, 5.615e-02, 2.819e-02, 2.307e-02, -8.381e-02, -7.659e-02, -2.282e-02, -5.658e-03, -2.112e-02, 2.429e-02, 1.225e-02, 5.191e-02, 1.899e-01, 9.200e-02, 1.121e-01, -1.458e-02) * s1_2_1;
	r2 += M4(1.509e-02, 6.455e-02, -3.774e-02, -8.044e-02, 1.883e-01, -2.150e-01, -6.765e-02, 4.388e-02, -1.387e-01, 5.360e-02, -1.172e-03, -2.261e-02, 2.265e-02, 3.431e-01, 3.810e-01, -1.851e-01) * s1_2_1;
	r3 += M4(-1.597e-01, -1.694e-01, 4.392e-02, -1.353e-02, -4.768e-02, -1.768e-01, -1.743e-01, -1.091e-01, 1.798e-03, -3.925e-02, -3.455e-02, -2.567e-02, -2.423e-01, 1.024e-01, -6.664e-03, 3.696e-02) * s1_2_1;
	r0 += M4(-1.277e-02, 5.941e-03, -5.918e-02, 1.168e-02, -4.565e-02, 1.083e-02, -4.378e-03, 4.823e-02, 3.530e-02, 4.863e-03, 5.151e-02, 6.034e-02, 1.353e-01, 1.626e-01, 8.336e-02, -1.566e-02) * s1_2_2;
	r1 += M4(-2.948e-03, -4.209e-02, -1.523e-02, 1.829e-02, 1.072e-02, 2.205e-02, 1.173e-02, 2.265e-02, 4.127e-03, 7.668e-02, 1.176e-02, -4.401e-02, -1.352e-01, -9.593e-02, -4.400e-02, -1.763e-02) * s1_2_2;
	r2 += M4(2.243e-02, 6.267e-03, 3.188e-03, -2.457e-02, 9.468e-02, 2.250e-02, -5.066e-02, -4.863e-02, -5.603e-02, 3.459e-03, 3.768e-02, -3.933e-03, -2.246e-02, 4.544e-02, 1.279e-01, 2.322e-02) * s1_2_2;
	r3 += M4(-2.148e-02, -4.819e-02, -2.312e-02, -5.948e-03, -1.505e-02, 1.711e-02, 9.489e-03, 1.754e-02, 2.546e-02, -1.646e-01, 4.235e-02, 1.171e-01, 6.437e-02, 4.002e-02, -5.847e-03, -9.788e-02) * s1_2_2;
	r0 += V4(-8.721e-03, -1.653e-02, -1.863e-02, -2.118e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.713e-02, -2.685e-02, -1.923e-02, -1.789e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.822e-02, -1.558e-02, -1.133e-02, -2.768e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r2));
	r3 += V4(-1.432e-02, -1.945e-02, -2.069e-02, -2.167e-02);
	r3 = clamp(r3, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r3));
}

//!DESC [CuNNy_4x16_DS] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv4
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
shared V4 G[4][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			vec2 p;
			p = vec2(clamp(pos + ivec2(x - 1, y - 1), ivec2(0), sz) * ivec2(2, 2) + ivec2(1, 1)) * conv4_pt;
			V4 sr0 = V4(conv4_gather(p, 0));
			V4 sg0 = V4(conv4_gather(p, 1));
			V4 sb0 = V4(conv4_gather(p, 2));
			V4 sa0 = V4(conv4_gather(p, 3));
			G[0][ay][ax] = V4(sr0.w, sg0.w, sb0.w, sa0.w);
			G[1][ay][ax] = V4(sr0.z, sg0.z, sb0.z, sa0.z);
			G[2][ay][ax] = V4(sr0.x, sg0.x, sb0.x, sa0.x);
			G[3][ay][ax] = V4(sr0.y, sg0.y, sb0.y, sa0.y);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-5.995e-03, 4.945e-03, 1.067e-03, 1.000e-03, 1.214e-02, 5.690e-04, 1.167e-03, 1.437e-03, 1.579e-02, 1.253e-02, -1.648e-03, 2.777e-04, 1.670e-02, -1.328e-02, 6.442e-03, -3.961e-03) * s0_0_0;
	r0 += M4(-8.513e-02, -7.450e-02, 4.316e-03, -1.147e-03, 3.058e-02, 2.366e-02, -7.475e-03, -4.622e-03, -1.192e-02, 7.398e-03, -5.571e-03, -5.490e-03, -1.430e-02, -2.143e-02, 5.814e-03, 1.732e-02) * s0_0_1;
	r0 += M4(8.188e-03, -1.974e-02, -1.421e-03, 2.216e-03, 7.702e-03, 2.437e-02, 3.103e-03, -3.055e-03, 2.958e-03, -2.427e-03, -1.088e-03, -1.527e-03, 1.192e-03, 9.478e-04, 2.037e-04, 2.855e-03) * s0_0_2;
	r0 += M4(5.455e-02, -6.095e-03, 2.648e-02, 1.887e-03, -1.129e-03, 6.554e-03, 2.223e-02, -3.715e-03, -1.352e-01, 7.001e-03, 5.332e-02, 8.001e-03, 1.138e-02, -2.657e-02, 1.098e-01, 2.640e-02) * s0_1_0;
	r0 += M4(1.223e-01, 1.675e-01, 9.407e-02, 3.818e-02, -3.041e-01, -1.655e-01, 1.255e-01, 1.023e-01, -5.969e-02, -2.588e-01, 1.158e-01, 1.528e-01, -2.329e-02, 9.395e-02, 1.792e-01, -5.532e-01) * s0_1_1;
	r0 += M4(9.873e-04, 2.327e-02, -1.180e-02, 5.969e-02, 7.430e-04, -1.284e-01, -1.632e-02, 2.920e-02, -6.442e-04, 5.948e-02, 7.548e-03, 3.617e-02, -3.080e-04, -4.607e-03, -5.603e-03, 7.056e-02) * s0_1_2;
	r0 += M4(-6.654e-03, -1.173e-05, -2.432e-02, 1.389e-03, -4.595e-03, -1.734e-03, -1.510e-03, 2.310e-03, 1.132e-02, 2.819e-03, 1.310e-02, 5.400e-03, 3.838e-03, 8.973e-04, 7.009e-03, -2.104e-03) * s0_2_0;
	r0 += M4(-8.597e-03, -1.427e-02, -7.879e-02, -9.497e-02, -1.182e-03, 7.331e-04, 9.392e-02, 3.236e-02, 1.500e-02, 1.899e-02, -8.082e-02, -2.123e-02, -3.992e-03, -3.513e-03, 2.524e-02, 4.161e-02) * s0_2_1;
	r0 += M4(5.866e-04, 2.202e-03, 8.718e-04, -1.882e-02, 2.026e-05, -4.847e-03, -4.457e-03, 4.836e-02, 2.789e-03, 5.601e-03, 3.429e-04, -4.186e-02, 3.049e-05, -5.566e-04, -9.132e-04, 5.467e-03) * s0_2_2;
	r0 += M4(8.802e-03, -7.795e-04, 1.688e-03, -5.253e-03, 1.208e-02, 2.589e-03, 2.892e-04, 1.559e-03, -1.315e-03, -3.969e-03, -4.646e-03, -4.147e-04, 2.012e-03, -1.112e-03, 2.075e-03, 1.805e-04) * s1_0_0;
	r0 += M4(-5.655e-02, 1.327e-02, 9.886e-03, 1.593e-02, -7.974e-03, 6.795e-03, 1.393e-02, 1.951e-03, 1.178e-02, 1.128e-02, -1.090e-02, -4.452e-03, -4.506e-02, 7.565e-03, 6.446e-03, 6.311e-03) * s1_0_1;
	r0 += M4(2.525e-04, -2.229e-03, -1.282e-03, -3.064e-03, 6.856e-04, -9.450e-03, -1.911e-03, 7.053e-04, -2.222e-03, -7.426e-03, 2.544e-03, -1.837e-03, -2.013e-03, 1.753e-02, -3.794e-03, 8.085e-03) * s1_0_2;
	r0 += M4(-1.404e-02, -9.317e-04, 5.482e-02, -1.041e-02, 8.166e-02, 3.676e-03, 7.495e-02, 3.798e-03, 3.138e-02, -7.983e-03, -1.355e-02, 2.598e-04, -3.683e-04, 3.317e-04, -3.180e-03, -8.964e-04) * s1_1_0;
	r0 += M4(1.252e-01, 3.328e-02, -4.104e-01, 2.460e-01, -1.677e-01, 1.284e-01, -1.763e-01, 1.239e-01, -4.541e-01, 1.997e-01, 3.728e-02, 4.114e-03, -2.852e-01, -4.191e-02, -2.798e-01, -3.993e-02) * s1_1_1;
	r0 += M4(-1.600e-02, -1.270e-03, 5.646e-04, 1.125e-01, 7.740e-04, -6.665e-02, 7.803e-03, -7.215e-02, 7.810e-03, 1.036e-01, -1.893e-02, -1.527e-02, 1.568e-02, 8.520e-02, 2.811e-02, 9.585e-02) * s1_1_2;
	r0 += M4(-1.199e-03, -2.526e-04, 3.804e-03, -1.325e-03, -3.196e-03, 1.733e-03, 1.568e-02, 2.864e-03, 3.747e-03, -2.459e-03, -9.719e-03, -9.395e-05, 8.920e-04, -8.818e-04, 3.437e-03, -1.316e-03) * s1_2_0;
	r0 += M4(-3.646e-03, -7.665e-04, 4.480e-02, 3.372e-02, 8.046e-03, -4.775e-03, 5.261e-03, 1.890e-03, 6.027e-03, 1.823e-02, -7.143e-02, -1.739e-03, 7.385e-05, 4.309e-03, -5.982e-02, 1.023e-02) * s1_2_1;
	r0 += M4(3.822e-03, 1.573e-03, 4.677e-03, -1.596e-03, 1.322e-04, -1.563e-03, -2.221e-03, -3.253e-03, -2.441e-03, -1.186e-02, 2.360e-03, -1.242e-02, -2.677e-03, 9.064e-03, -1.109e-02, 8.459e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2]; s1_0_0 = G[3][xy.y+0][xy.x+0];
	s1_0_1 = G[3][xy.y+0][xy.x+1]; s1_0_2 = G[3][xy.y+0][xy.x+2];
	s1_1_0 = G[3][xy.y+1][xy.x+0]; s1_1_1 = G[3][xy.y+1][xy.x+1];
	s1_1_2 = G[3][xy.y+1][xy.x+2]; s1_2_0 = G[3][xy.y+2][xy.x+0];
	s1_2_1 = G[3][xy.y+2][xy.x+1]; s1_2_2 = G[3][xy.y+2][xy.x+2];
	r0 += M4(-2.187e-02, -5.752e-03, -3.526e-04, 3.286e-03, -1.035e-02, -2.966e-04, -1.070e-04, -9.189e-07, -8.651e-03, 7.416e-04, -1.373e-03, -1.131e-03, 1.124e-02, 1.398e-06, 5.600e-07, 9.953e-07) * s0_0_0;
	r0 += M4(-1.540e-02, -2.724e-02, 4.078e-03, 2.466e-03, -2.911e-02, -2.924e-02, 3.959e-06, 3.905e-06, -4.422e-02, -3.100e-02, 8.653e-03, 3.745e-03, 3.357e-02, 3.333e-02, -5.626e-08, 3.352e-07) * s0_0_1;
	r0 += M4(2.286e-04, -2.615e-03, 5.489e-04, 2.567e-03, -1.365e-04, -1.059e-02, -9.060e-07, -6.379e-04, -4.281e-03, -2.496e-02, -2.958e-03, 4.439e-03, 1.667e-06, 1.224e-02, 1.057e-06, 2.774e-06) * s0_0_2;
	r0 += M4(-4.736e-02, -2.802e-02, -6.003e-02, -2.759e-02, -2.899e-02, 6.474e-07, -2.889e-02, -4.668e-07, -1.340e-02, -7.627e-03, -2.521e-02, 3.139e-03, 3.235e-02, -2.336e-06, 3.210e-02, -9.201e-07) * s0_1_0;
	r0 += M4(-1.132e-02, -1.853e-01, -4.847e-02, -9.058e-02, -8.225e-02, -8.227e-02, -8.228e-02, -8.276e-02, 9.184e-02, 4.058e-02, -1.436e-01, -1.173e-01, 9.498e-02, 9.512e-02, 9.463e-02, 9.477e-02) * s0_1_1;
	r0 += M4(7.670e-04, -1.755e-02, 1.643e-03, -1.739e-02, 7.014e-07, -2.952e-02, 2.536e-05, -2.936e-02, -9.995e-03, 2.752e-02, 7.793e-03, -4.232e-02, 5.156e-06, 3.308e-02, 5.141e-07, 3.321e-02) * s0_1_2;
	r0 += M4(-1.318e-02, -2.586e-04, -2.864e-02, -2.083e-02, -7.256e-05, -3.192e-06, -1.004e-02, -3.678e-04, 3.599e-03, -1.518e-03, 2.399e-02, -1.000e-02, 5.211e-07, -6.098e-07, 1.112e-02, 3.412e-07) * s0_2_0;
	r0 += M4(-1.194e-03, -1.357e-02, -2.997e-02, -2.485e-02, 8.260e-05, 4.389e-05, -2.863e-02, -2.820e-02, 8.932e-03, 1.333e-02, 1.401e-01, 1.401e-01, 4.806e-07, 4.716e-06, 3.406e-02, 3.428e-02) * s0_2_1;
	r0 += M4(3.853e-05, 1.574e-03, -2.097e-05, -1.693e-02, -6.357e-07, -8.190e-05, -8.311e-05, -1.071e-02, -1.577e-03, -1.646e-03, -3.394e-03, 3.699e-02, -4.295e-07, 9.416e-05, 1.295e-06, 1.156e-02) * s0_2_2;
	r0 += M4(1.436e-02, -3.681e-03, -1.747e-04, -2.611e-03, 7.324e-03, -7.632e-03, -1.737e-03, -1.743e-04, 3.010e-02, 5.378e-03, 4.855e-03, 1.515e-03, -7.348e-03, 5.262e-03, 3.246e-03, 1.487e-03) * s1_0_0;
	r0 += M4(1.579e-01, 1.210e-01, 4.900e-03, 6.993e-03, 9.487e-02, 7.983e-02, -2.281e-03, 1.721e-03, 3.308e-02, 4.089e-02, -7.328e-03, -2.611e-03, 4.702e-02, -5.968e-03, 1.401e-02, 5.173e-03) * s1_0_1;
	r0 += M4(-6.480e-03, 4.306e-02, 2.046e-03, 3.048e-03, 5.928e-04, 3.496e-02, 6.720e-04, -3.550e-03, -1.417e-03, 1.070e-02, 2.358e-04, 4.609e-05, 5.396e-04, 9.596e-03, -1.056e-03, -4.326e-03) * s1_0_2;
	r0 += M4(-1.899e-02, 2.230e-04, 1.178e-03, -1.764e-02, 2.040e-02, -3.702e-03, -5.188e-02, 2.287e-02, 1.567e-01, 5.945e-02, 7.475e-02, 1.866e-03, -3.744e-02, 1.047e-02, -4.762e-02, 1.109e-02) * s1_1_0;
	r0 += M4(-1.680e-01, -1.323e-01, 1.121e-01, 1.035e-01, 9.350e-02, 8.716e-02, -3.037e-01, -2.866e-01, 1.606e-01, -4.899e-01, -8.259e-03, 1.264e-01, 1.798e-01, -2.036e-01, 1.919e-01, -1.957e-01) * s1_1_1;
	r0 += M4(1.169e-02, -4.984e-02, -2.178e-02, -5.446e-03, -7.014e-03, 2.597e-02, 8.576e-03, -7.251e-02, -7.627e-04, 6.910e-02, -3.691e-04, -3.502e-04, -8.899e-04, 7.573e-02, 1.307e-03, 7.642e-02) * s1_1_2;
	r0 += M4(-2.761e-03, -1.731e-03, -1.272e-02, -2.617e-03, 2.759e-03, 2.202e-03, 1.779e-02, 8.336e-03, 2.510e-02, -5.055e-04, 4.140e-02, 5.240e-03, 1.463e-03, 1.858e-03, 1.219e-03, 5.176e-03) * s1_2_0;
	r0 += M4(6.036e-03, -6.748e-04, -6.274e-02, -4.721e-02, -5.174e-03, -1.696e-03, 2.422e-02, 2.032e-02, 4.474e-03, 4.041e-02, -2.256e-02, -3.415e-02, 9.583e-03, 3.242e-03, 3.723e-02, -2.373e-02) * s1_2_1;
	r0 += M4(-3.321e-03, 4.355e-03, 2.255e-03, -2.645e-02, 2.431e-03, -2.197e-03, 4.350e-03, 1.894e-02, -5.017e-04, 2.102e-03, 2.024e-03, 4.611e-03, -6.688e-05, -4.810e-03, -3.306e-04, 1.096e-02) * s1_2_2;
	r0 += V4(1.133e-11, -1.175e-10, 3.519e-11, 1.088e-10);
	r0 = r0;
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
